From 9933cec0a18ae2a3d752f269d1bb12c19f51199d Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Sun, 21 Jul 2013 15:35:02 -0400 Subject: IRC. --- hurd/debugging/rpctrace.mdwn | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'hurd/debugging') diff --git a/hurd/debugging/rpctrace.mdwn b/hurd/debugging/rpctrace.mdwn index a5c1a6e9..1570df4c 100644 --- a/hurd/debugging/rpctrace.mdwn +++ b/hurd/debugging/rpctrace.mdwn @@ -182,6 +182,10 @@ See `rpctrace --help` about how to use it. uhu, there's a TODO just above that assertion :) +* IRC, freenode, #hurd, 2013-07-05 + + wish: make rpctrace decode the results of io_stat rpcs + # See Also -- cgit v1.2.3 From eccdd13dd3c812b8f0b3d046ef9d8738df00562a Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Wed, 25 Sep 2013 21:45:38 +0200 Subject: IRC. --- community/gsoc/2013/hacklu.mdwn | 1482 +++++++++++ community/gsoc/2013/nlightnfotis.mdwn | 2587 ++++++++++++++++++++ .../gsoc/project_ideas/download_backends.mdwn | 11 +- community/gsoc/project_ideas/mtab/discussion.mdwn | 1167 ++++++++- community/gsoc/project_ideas/object_lookups.mdwn | 29 + community/gsoc/project_ideas/sound/discussion.mdwn | 47 + contributing.mdwn | 3 + contributing/discussion.mdwn | 68 +- faq/still_useful.mdwn | 2 +- faq/system_port.mdwn | 24 + glibc/signal/signal_thread.mdwn | 45 +- hurd/debugging/rpctrace.mdwn | 39 + hurd/libstore.mdwn | 37 +- hurd/libstore/part.mdwn | 133 +- hurd/running/debian/dhcp.mdwn | 97 +- hurd/subhurd.mdwn | 376 ++- hurd/translator.mdwn | 5 +- hurd/translator/eth-filter.mdwn | 26 + hurd/translator/examples.mdwn | 8 +- hurd/translator/exec.mdwn | 8 +- hurd/translator/ext2fs.mdwn | 63 + hurd/translator/fifo.mdwn | 48 + hurd/translator/hostmux.mdwn | 15 + hurd/translator/httpfs.mdwn | 100 + hurd/translator/nsmux.mdwn | 27 +- hurd/translator/pfinet.mdwn | 9 +- hurd/translator/pfinet/implementation.mdwn | 164 ++ hurd/translator/pflocal.mdwn | 28 +- hurd/translator/proc.mdwn | 22 + hurd/translator/procfs/jkoenig/discussion.mdwn | 10 + hurd/translator/ufs.mdwn | 38 + libpthread.mdwn | 2 +- microkernel/discussion.mdwn | 40 +- microkernel/l4.mdwn | 12 +- microkernel/mach/concepts.mdwn | 17 + microkernel/mach/deficiencies.mdwn | 66 + microkernel/mach/documentation.mdwn | 13 +- .../mach/gnumach/hardware_compatibility_list.mdwn | 5 + microkernel/mach/message/msgh_id.mdwn | 24 + microkernel/mach/mig.mdwn | 3 +- microkernel/mach/mig/documentation.mdwn | 21 +- microkernel/mach/mig/structured_data.mdwn | 119 - open_issues/anatomy_of_a_hurd_system.mdwn | 143 ++ open_issues/arm_port.mdwn | 53 + open_issues/boehm_gc.mdwn | 92 +- open_issues/clock_gettime.mdwn | 10 + open_issues/cloud.mdwn | 49 + open_issues/crash_server.mdwn | 61 +- open_issues/dbus.mdwn | 175 +- open_issues/dde.mdwn | 144 ++ open_issues/device_drivers_and_io_systems.mdwn | 6 + open_issues/exec.mdwn | 49 +- open_issues/exec_leak.mdwn | 57 - open_issues/exec_memory_leaks.mdwn | 54 +- open_issues/fakeroot_eagain.mdwn | 4 +- open_issues/gccgo.mdwn | 11 +- open_issues/gdb.mdwn | 43 + open_issues/glibc.mdwn | 627 ++++- open_issues/glibc/debian.mdwn | 39 +- open_issues/glibc/t/tls-threadvar.mdwn | 40 + open_issues/hurd_init.mdwn | 216 ++ open_issues/libnetfs_argument_parsing.mdwn | 62 - open_issues/libnetfs_passive_translators.mdwn | 55 + open_issues/libnetfs_vs_libdiskfs.mdwn | 118 + open_issues/libpthread.mdwn | 2 +- .../libpthread/t/fix_have_kernel_resources.mdwn | 217 ++ open_issues/libpthread_dlopen.mdwn | 16 +- open_issues/llvm.mdwn | 29 + open_issues/mach_migrating_threads.mdwn | 88 +- .../memory_object_model_vs_block-level_cache.mdwn | 243 +- open_issues/mig_portable_rpc_declarations.mdwn | 113 +- open_issues/mig_stub_functions.mdwn | 41 + open_issues/nptl.mdwn | 22 +- open_issues/pthread_atfork.mdwn | 13 +- open_issues/resource_management_problems.mdwn | 51 +- open_issues/robustness.mdwn | 44 +- open_issues/secure_file_descriptor_handling.mdwn | 5 +- open_issues/systemd.mdwn | 933 +++++++ ..._spin_lock_locked_ss_critical_section_lock.mdwn | 33 +- open_issues/time.mdwn | 762 +++++- open_issues/tmux.mdwn | 24 + open_issues/virtualization/fakeroot.mdwn | 43 + open_issues/virtualization/networking.mdwn | 72 +- public_hurd_boxen.mdwn | 2 +- public_hurd_boxen/sceen.mdwn | 10 +- 85 files changed, 11578 insertions(+), 333 deletions(-) create mode 100644 community/gsoc/project_ideas/sound/discussion.mdwn create mode 100644 hurd/translator/eth-filter.mdwn create mode 100644 hurd/translator/fifo.mdwn create mode 100644 hurd/translator/httpfs.mdwn create mode 100644 hurd/translator/ufs.mdwn delete mode 100644 microkernel/mach/mig/structured_data.mdwn create mode 100644 open_issues/cloud.mdwn delete mode 100644 open_issues/exec_leak.mdwn create mode 100644 open_issues/hurd_init.mdwn delete mode 100644 open_issues/libnetfs_argument_parsing.mdwn create mode 100644 open_issues/libnetfs_passive_translators.mdwn create mode 100644 open_issues/libnetfs_vs_libdiskfs.mdwn create mode 100644 open_issues/mig_stub_functions.mdwn create mode 100644 open_issues/tmux.mdwn (limited to 'hurd/debugging') diff --git a/community/gsoc/2013/hacklu.mdwn b/community/gsoc/2013/hacklu.mdwn index d0185c60..b7de141b 100644 --- a/community/gsoc/2013/hacklu.mdwn +++ b/community/gsoc/2013/hacklu.mdwn @@ -615,3 +615,1485 @@ In context of [[open_issues/libpthread/t/fix_have_kernel_resources]]: found that. hacklu: That's how I found it, yes. tschwinge: :) + + +# IRC, freenode, #hurd, 2013-07-14 + + hi. what is a process's msgport? + And where can I find the msg_sig_post_untraced_request()? + (msg_sig_post* in [hurd]/hurd/msg_defs) + this is my debugger demo code + https://github.com/hacklu/HDebugger.git use make test to run the demo. I + put a breakpoint before the second printf in hello_world(inferior + program). but I can't resume execution from that. + could somebody give me some suggestions? thanks so much. + hacklu: % make test + make: *** No rule to make target `exc_request_S.c', needed by + `all'. Stop. + teythoon: updated, forget to git add that file . + hacklu_: cool, seems to work now + will look into this tomorrow :) + exit + teythoon: not work. the code can,t resume from a breakpoint + + +# IRC, freenode, #hurd, 2013-07-15 + + hi, this is my weekly + report. http://hacklu.com/blog/gsoc-weekly-report4-148/ + sadly to unsolve the question of resume from breakpoint. + hacklu: have you tried to figure out what gdb does to resume a + process? + teythoon: hi. em, I have tried, but haven't find the magic in gdb + yet. + have you tried rpctrace'ing gdb? + no, rpctrace has too many noise. I turned on the debug in gdb. + I don't want rpctrace start gdb as its child task. if it can + attach at some point instead of at start + hacklu: you don't need to use gdb interactively, you could pipe + some commands to it + teythoon: that sounds a possible way. I am try it, thank you + youpi: gdb can't work correctlly with rpctrace even in batch + mode. + get something like this "rpctrace: get an unknown send right from + process 2151" + hacklu: well, ideally, fix rpctrace ); + ;) + hacklu: but you can also as on the list, perhaps somebody knows + what you need + ok. + or I should debug gdb more deeply. + do both + so either of them may win first + + braunr: I have found that, if there is no exception appears, the + signal thread will not be createed. Then there is only one thread in the + task. + + +# IRC, freenode, #hurd, 2013-07-17 + + braunr: ping + hacklu__: yes ? + I have reply your email + i don't understand + "I used this (&_info)->suspend_count to get the sc value." + before the thread_info call ? + no, after the call + but you have a null pointer + the info should be returned in info, not _info + strange thing is the info is a null pointer. but _info not + _info isn't a pointer, that's why + the kernel will use it if the data fits, which is usually the case + in the begin , the info=&_info. + and it will dynamically allocate memory if it doesn't + yes + info should still have that value after the call + but the call had change it. this is what I can;t understand. + are you completely sure err is 0 on return ? + since the parameter is a pointer to pointer, the thread_info can + change it , but I don't think it is a good ideal to set it to null + pointer without any err . + yes. i am sure + info_len is wrong + it should be the number of integers in _info + i.e. sizeof(_info) / sizeof(unsigned int) + i don't think that's the problem though + yes, THREAD_BASIC_INFO_COUNT is already exactly that + hm not exactly + yes, exactly in fact + I try to set it by hand, not use the macro. + the macro is already defined as #define THREAD_BASIC_INFO_COUNT + (sizeof(thread_basic_info_data_t) / sizeof(natural_t)) + the info_len is 13. I checked. + so, i said something wrong + the call doesn't reallocate thread_info + it uses the provided storage, nothing else + yes, your call is wrong + use thread_info (thread->port, THREAD_BASIC_INFO, (int *) info, + &info_len); + em. thread_info (thread->port, THREAD_BASIC_INFO, (int *) &info, + &info_len); + &info would make the kernel erase the memory where info (the + pointer) was stored + info, not &info + or &_info directly + i don't see the need for an intermediate pointer here + ideally, avoid the cast + but in gnu-nat.c line 3338, it use &info. + use a union with both thread_info_data_t and + thread_basic_info_data_t + well, try it my way + i think they're wrong + ok, you are right, use info it is ok. the value is the same as + &_info after the call. + but the suspend_count is zero again. + check the rest of the result to see if it's consistent + I think this line need a patch. + what you mean the rest of the result? + the thread info + run_state, sleep_time, creation_time + see if they make sense + ok, I try to dump it + bbl + braunr: thread [118] suspend_count=0 + run_state=3, flags=1, sleep_time=0, + creation_time.second=1374079641 + something like this, seems no problems. + + +# IRC, freenode, #hurd, 2013-07-18 + + how to get the thread state from TH_STATE_WAITING to + TH_STATE_RUNNING + hacklu__: + http://www.gnu.org/software/hurd/gnumach-doc/Thread-Execution.html#Thread-Execution + hacklu__: ah waiting + hacklu__: this means the thread is waiting for an event + so probably waiting for a message + or an internal kernel event + braunr: so I need to send it a message. I think I maybe forget + to send some reply message. + hacklu__: i'm really not sure about those low level details + confirm before doing anything + the gdb has called msg_sig_post_untraced_request(), I don't get + clear about this function, I just call it as the same, maybe I am wrong . + how will if I send a CONT to the stopped process? maybe I should + try this. + when the inferior is in waiting + status(TH_STATE_WAITING,suspend_count=0), I use kill to send a CONT. then + the become(TH_STATE_STOP,suspend_count=1). when I think I am near the + success,I call thread_resume(),inferior turn out to be (TH_STATE_WAITING, + suspend_count=0). + so yes, probably waiting for a message + braunr: after send a CONT to the inferior, then send a -9 to the + debugger, the inferior continue!!! + probably because it was notified there wasn't any sender any more + that's funny, I will look deep into thread_resume and kill + (gdb being the sender here) + in hurd, when gdb attach a inferior, send signal to the + inferior, who will get the signal first? the gdb or the inferior? + quite differnet with linux. seems the inferior get first + do you mean gdb catches its own signal through ptrace on linux ? + kkk + ? + + +# IRC, freenode, #hurd, 2013-07-20 + + braunr: yeah, on Linux the gdb catch the signal from inferior + before the signal handler. And that day my network was broken, I can't + say goodbye to you. sorry for that. + + +# IRC, freenode, #hurd, 2013-07-22 + + hi all, this is my weekly + report. http://hacklu.com/blog/gsoc-weekly-report5-152/ + good to hear that you got the resume issue figured out + teythoon: thanks :) + hacklu: so your next step is to port gdbserver to hurd? + yep, I am already begin to. + before the mid-evaluate, I must submit something. I am far behind + my personal expections + hacklu: You've made great progress! Sorry, for not being able + to help you very much: currently very busy with work. :-| + hacklu: Working on gdbserver now is fine. I understand you + have been working on HDebugger to get an understanding of how everyting + works, outside of the huge GDB codebase. It's of course fine to continue + working on HDebugger to test things, etc., and that also counts very much + for the mid-term evaluation, so nothing to worry about. :-) + but I have far away behind my application on GSOC. I haven't + submit any patches. is it ok? + hacklu: Don't worry. Before doing the actual work, things + always look much simpler than they will be. So I was expecting/planning + for that. + The Hurd system is complex, with non-trivial and sometimes + asynchronous communication between the different components, and so it + takes some time to get an understanding of all that. + yes, I haven't get all clear about the signal post. that's too + mazy. + hacklu: It surely is, yes. + tschwinge: may you help me to understand the msg_sig_post(). I + don't want to understand all details now, but I want to get the _right_ + understanding of the gerneral. + as I have mentioned on my weekly report, gdb is listening on the + inferior's exception port, then gdb post a signal to that port. That + says: gdb post a message to herself, and handle it. is this right? + tschwinge: [gdb]/gdb/gnu-nat.c (line 1371), and + [glibc]/hurd/hurdsig.c(line 1390) + hacklu: My current understanding is that this is a "real" + signal that is sent to the debugged process' signal thread (msgport), and + when that process is resumed, it will process that signal. + hacklu: This is different from the Mach kernel sending an + exception signal to a thread's exception port, which GDB is listening to. + Or am I confused? + is the msgport equal the exception port? + in my experience, when the thread haven't cause a exception, the + signal thread will not be created. after the exception occured, the + signal thread is come out. so somebody create it, who dose? the mach + kernel? + hacklu: My understanding is that the signal thread would always + be present, because it is set up early in a process' startup. + but when I call task_threads() before the exception appears, only + on thread returned. + "Interesting" -- another thing to look into. + hacklu: Well, you must be right: GDB must also be listening to + the debugged process' msgport, because otherwise it wouldn't be able to + catch any signals the process receives. Gah, this is all too complex. + tschwinge: that's maybe not. gdb listening on the task's exception + port, and the signal maybe handle by the signal thread if it could + handle. otherwise the signal thread pass the exception to the task's + exception port where gdb catched. + hacklu: Ah, I think I now get it. But let me first verify... + ;-) + + something strange. I have write a program to check whether create + signal threads at begining, the all created! + tschwinge: this is my test code and + result. http://pastebin.com/xtM6DUnG + cat test.c + #define _GNU_SOURCE 1 + #include + #include + #include + #include + #include + int main(int argc,char** argv) + { + mach_port_t task_port; + thread_array_t threads[5]; + mach_msg_type_number_t num_threads[5]; + error_t err; + task_port = mach_task_self(); + int i; + int j; + for(i=0;i<5;i++) + if(task_port){ + err = task_threads(task_port,&threads[i],&num_threads[i]); + if(err) + printf("err\n"); + } + for(i=0;i<5;i++){ + printf("===============\n"); + printf("has %d threads now\n",num_threads[i]); + for(j=0;j tschwinge: the result is different with HDebugger case. + + hacklu: It is my understanding that the two sig_post_untraced + RPC calls in inf_signal indeed are invoked on the real msgport (signal + thread) if the debugged process. + That port is retrieved via the + INF_MSGPORT_RPC/INF_RESUME_MSGPORT_RPC macro, which invoked + proc_getmsgport on the proc server, and that will return (unless + overridden by proc_setmsgport, but that isn't done in GDB) the msgport as + set by [glibc]/hurd/hurdinit.c:_hurd_new_proc_init or _hurd_setproc. + inf_signal is called from gnu_resume, which is via + [target_ops]->to_resume is called from target.c:target_resume, which is + called several places, for example infrun.c:resume which is used to a) + just resume the debugged process, or b) resume it and have it handle a + Unix signal (such as SIGALRM, or so), when using the GDB command »signal + SIGALRM«, for example. + So such a signal would then not be intercepted by GDB itself. + By the way, this is all just from reading the code -- I hope I + got it all right. + + Another thing: In Mach 3 Kernel Principles, the standard + sequence described on pages 22, 23 is thread_suspend, thread_abort, + thread_set_state, thread_resume, so you should probably do that in + HDebugger too, and not call thread_set_state before. + I would hope the GDB code also follows the standard sequence? + Can you please check that? + + The one thing I'm now confused about is where/how GDB + intercepts the standard setup (probably in glibc's signaling mess?) so + that it receives any signals raised in the debugged process. + But I'll have to continue later. + + tschwinge: thanks for your detail answers. I don't realize that + the gnu_resume will resume for handle a signal, much thanks for point + this:) + tschwinge: I am not exactly comply with when I call thread_set_state. but I have called a + task_suspend before. I think it's not too bad:) + hacklu___: Yes, but be aware that gnu_resume is only relevant + if a signal is to be forwarded to the debugged process (to be handled + there), but not for the case where GDB intercepts the signal (such as + SIGSEGV), and handles it itself without then forwarding it to the + application. See the »info signals« GDB command. + I also confused about when to start the signal thread. I will + do more experiment. + I have found this: when the inferior is stop at a breakpoint, I + use kill to send a CONT to it, the HDebugger will get this message who + listening on the exception port. + + +# IRC, freenode, #hurd, 2013-07-28 + + how to understand the rpctrace output? + like this. 142<--143(pid15921)->proc_mark_stop_request (19 0) + 125<--1 + 27(pid-1)->msg_sig_post_request (20 5 task108(pid15919)); + what is the (pid-1)? the kernel? + 1 is /hurd/init + pid-1 not means minus 1? + ah, funny, you're right... I dunno then + 2 is the kernel though + the 142<--143 is port name? + could very well be, but I'm not sure, sorry + the number must be the port name. + anyone knows why /hurd/init does not get dead name notifications + for /hurd/exec like it does for any other essential server? + as far as I can see it successfully asks for them + about rpctrace, it poses as the kernel for its children, parses + and relays any messages sent over the childrens message port, right? + + +# IRC, freenode, #hurd, 2013-07-29 + + hi. this is my weekly + report. http://hacklu.com/blog/gsoc-weekly-report6-156/ + hacklu_: the inferior voluntarily stops itself if it gets a + signal and notifies its tracer? + yes + what if it chose not to do so? undebugable program? + debugged program will be set an flag so called + hurdsig_traced. normal program will handle the signal by himself. + in my env, I found that when GDB attach a running program, gdb + will not catch the signal send to the program. May help me try it? + it doesn't? I'll check... + hacklu_: yes, you're right + you can just gdb a loop program, and kill -CONT to it. If I do + this I will get "Can't wait for pid 12332:NO child processes" warning. + yes, I noticed that too + does gdb reparent the tracee? + I don't think this is a good behavior. gdb should get inferior's + signal + absolutely + In linux it does, not sure about hurd. but I think it should. + definitively. there is proc_child in process.defs, but that may + only be used once to set the parent of a process + gdb doesn't set the inferior as its child process if attached a + running procss in HURD. + + hacklu_: So you figured out this tracing/signal stuff. Great! + tschwinge: Hi. not exactly. + as I have mentioned, gdb can't get signal when attach to a + running process. + I also want to know how to build glibc in hurd. I have got this " + relocation error: ./libc.so: symbol _dl_find_dso_for_object, version + GLIBC_PRIVATE not defined in file ld.so.1 with link time reference" when + use LD_PRELOAD=./my_build_glibc/libc.so + hacklu: You can't just preload the new libc.so, but you'll also + need to use the new ld.so. Have a look at [glibc-build]/testrun.sh for + how to invoke these properly. Or, link with + »-Wl,-dynamic-linker=[glibc-build]/elf/ld.so,-rpath,[glibc-build]:[glibc-build]/elf + -L [glibc-build] -L [glibc-build]/elf«. If using the latter, I suggest + to also add »-Wl,-t« to verify that you're linking against the correct + libraries, and »ldd + [executable]« to verify that [€xecutable] will load the correct + libraries when invoked. + I will try that, and I can't find this call + pthread_cond_broadcast(). which will called in the proc_mark_stop + hacklu: Oh, right, you'll also need to add libpthread (I think + that's the directory name?) to the rpath and -L commands. + is libpthread a part of glibc or hurd? + glibc + hacklu: it is a different repository available here + http://git.savannah.gnu.org/cgit/hurd/libpthread.git/ + tschwinge: thanks for that, but I don't think I need help about + the comiler error now, it just say missing some C file. I will look into + the Makefile to verify. + but I think it's a part of glibc as a whole + hacklu: OK. + glibc is/was a stand-alone package and library, but in Debian + GNU/Hurd is nowadays integrated into glibc's build process. + NlightNFotis: thanks. I only add hurd, glibc, gdb,mach code to my + cscope file. seems need to add libpthread. + hacklu: If you use the Debian glibc package, our libpthread + will be in the libpthread subdirectory. + Ignore nptl, which is used for the Linux kernel. + tschwinge:BTW, I have found that, to continue the inferior from a + breakpoint, doesn't need to call msg_sig_post_untraced. just call + thread_abort and thread_resume is already ok. + I get the glibc from http://git.savannah.gnu.org/cgit/hurd. + hacklu: That sounds about right, because you want the inferior + to continue normally, instead of explicitly sending a (Unix) signal to + it. + hacklu: I suggest you use: »apt-get source eglibc« on your Hurd + system. + hacklu: The Savannah repository does not yet have libpthread + integrated. I have this on my TODO list... + tschwinge: no, apt-get source doesn't work in my Hurd. I got any + code from git clone *** + you most probably lack the deb-src entry in your sources.list + hacklu: Do you have deb-src lines in /etc/apt/source-list? Or + how does it fail? + tschwinge: I have deb-src lines. and apt-get complain that: E: + Unable to find a source package for eglibc or E: Unable to find a source + package for glibc + hacklu: which deb-src lines do you have? + and piece of my source_list : deb + http://ftp.debian-ports.org/debian unreleased main deb-src + http://ftp.debian-ports.org/debian unreleased main + you also need a deb-src line with the main archive + deb-src http://cdn.debian.net/debian unstable main + hacklu: Oh, hmm. And you did run »apt-get update« before? + That aside, there also is + that you can use. You'll need the *.dsc and *.debian.tar.xz files + corresponbding to your version of glibc, and the *.orig.tar.xz file. And + then run »dpkg-source -x *.dsc«. + The Debian snapshot is often very helpful if you need source + packages that are no longer in the main Debian repository. + or simply running dget on the dsc url + Oh. Good to know. + e.g. dget + http://cdn.debian.net/debian/pool/main/e/eglibc/eglibc_2.17-7.dsc + the network is slowly. and I am in apt-get update. + I will be away from this evening until sunday, too + what the main difference between the source site? + is dget means wget? + no + not exist in linux? + it does, in devscripts + it's a debian tool + oh, yes, I have installed devscripts. + I have got the libphread code, thanks. + + teythoon: the simple fact that this msg thread exists to receive + requests and that these requests are sent by ps and procfs is a potential + DoS + braunr: but does that mean that on Hurd a process can prevent a + debugger from intercepting signals? + teythoon: yes + that's not a problem for interactive programs + it's part of the hurd design that programs have limited trust in + each other + a user can interrupt his debugger if he sees no activity + that's more of a problem for non interactive system stuff like + init scripts + or procfs + why gdb can't get inferior's signal if attach a running process? + hacklu: try to guess + braunr: it is not a reasonable thing. I always think it should + catch the signal. + hacklu: signals are a unix thing built on top of mach + hacklu: think in terms of ports + all communication on the hurd goes through ports + but when use gdb to start a process and debugg it, this way, gdb + can catch the signal + hacklu: my guess is : + when starting a process, gdb can act as a proxy, much like + rpctrace + when attaching, it can't + braunr: ah, my question should ask like this: why gdb can't set + the inferior as its child process when attaching it? or it can not ? + hacklu: i'm not sure, the proc server is one of the parts i know + the less + but again, i guess there is no facility to update the msg port of + a process in the proc server + check that before taking it as granted + braunr: aha, I alway think you know everything:) + braunr: There is: setmsgport or similar. + if there is one, gdb doesn't use it + hacklu: That is a good question -- I can't answer it off-hand, + but it might be possible (by setting the tracing flag, and such things). + Perhaps it's just a GDB bug, which omits to do that. Perhaps just a + one-line code change, perhaps not. That's a new bug (?) report that we + may want to have a look at later on. + hacklu: But also note, this new problem is not really related + to your gdbserver work -- but of course you're fine to have a look at it + if you'd like to. + I just to ask for whether this is a normal behavior. this is + related to my gdbserver work, as gdbserver also need to attach a running + process... + gdbserver can start a process just like gdb does + you may want to focus on that first + Yes. + Attaching to processes that are already running is, I think, + always more complicated compared to the case where GDB/gdbserver has + complete control about the inferior right from the beginning. + yes, I am only focus on start one. the attach way I haven't + research now. + hacklu: That's totally fine. You can just say that attaching + to processes is not supported yet. + that's sound good:) + Ther will likely be more things in gdbserver that you won't be + able to easily support, so it's fine to do it step-by-step. + And then later add more features incrementally. + That's also easier for reviewing the patches. + + and one more question I have ask yestoday. what is the rpctrace + output (pid-1) mean? + hacklu: Another thing I can't tell off-hand. I'll try to look + it up. + hacklu, tschwinge: my theory is that it is in fact an error + message, maybe the proc server did not now a pid for the task + hacklu: utsl + tschwinge: for saving your time, I will look the code myself, I + don;t think this is a real hard question need you to help me by reading + the source code. + teythoon, hacklu: Yes, from a quick inspection it looks like + task2pid returning a -1 PID -- but I can't tell yet what that is supposed + to mean, if it's an actualy bug, or just means there is no data + available, or similar. + braunr: utsl?? + hacklu: http://www.catb.org/~esr/jargon/html/U/UTSL.html + tschwinge: thank you. braunr like say abbreviation which I can't + google out. + hacklu: Again, if this affects your work, it is fine to have a + look at that presumed rpctrace problem, if not, it is fine to have a look + at it if you'd like to, and otherwise, we'll file it as a possible bug to + be looked at laster. + hacklu: Now you learned that one. :-) + tschwinge: ok , this doesn't affect me now. If I have time I will + figure out it. + + btw, what about the copyright assignment process? + teythoon, hacklu: You still haven't heard from the FSF about + your copyright assignments? What's the latest you have heard? + tschwinge: I have wrote a emali to ask for that, but no reply. + tschwinge: last and only response I got was on July 1st, the + last ping with explicit request for confirmation was on July the 12th + hacklu: When did you send this email? + tschwinge: last week. + teythoon: I suggest you send another inquiry, and please put me + in CC. And if there'S no answer within a couple days (well, I'm away + until Monday...), I'll follow up. + hacklu: Likewise for you; depending on when exactly ;-) you + sent the last email. (Always allow for a few days until you exect an + answer, but if nothing happend within a week for such rather simple + administrative tasks, better ask again, unfrotunately.) + tschwinge:ok , I will email more + + how to understand the asyn RPC? + hacklu: hm ? + for instance, [hurd]/proc/main.c proc_server is loop in listening + message. and handle it by message_demuxer. + but when I send a request like proc_wait_request() to it, will it + block in the message_demuxer? + and where is the function of + ports_manage_port_operations_multithread()? + this one is in libports + it's the last thing a server calls after bootstrapping itself + message_demuxer normally blocks, yes + but it's not "async" + the names seems the proc_server is listening message with many + threads? + every server in the hurd does + threads are created by ports_manage_port_operations_multithread + when incoming messages can't be processed quick enough by the set of + already existing threads + if too many task send request to the server, will it ddos? + yes + every server but /hurd/init + (and /hurd/hello) + hacklu: that's, in my opinion, a major design defect + yes, that is reasonable. + that's what causes what i like to call thread storms on message + floods ... :) + my hurd clone is intended to address such major issues + couldn't that be migitated by some kind of heuristic? + it already is .. + I don't image that the port_manage_port_operations_multithread + will dynamically create threads. I thought the server will hang if all + work thread is in use. + that would also be a major defect + creating as many threads as necessary is a good thing + the problem is the dos + hacklu: btw, ddos is "distributed" dos, and it doesn't really + apply to what can happen on the hurd + why not ? as far as I known, the message transport is + transparent. hurd has the chance to be DDOSed + we don't care about the distributed property of the dos + oh, I know what you mean. + it simply doesn't matter + on thread calling select in an event loop with a low timeout (high + frequency) on a bunch of file descriptors is already enough to generate + many dead-name notifications + Oh! Based on what I've read in GDB source code, I thought the + proc server was single-threaded. However, it no longer is, after 1996's + Hurd commit fac6d9a6d59a83e96314103b3181f6f692537014. + those notifications cause message flooding at servers (usually + pflocal/pfinet), which spawn a lot of threads to handle those messages + one* thread + tschwinge: ah, the comment in gnu_nat.c is out of date! + hacklu: and please, please, clean the hello_world processes you're + creating on darnassus + i had to do it myself again :/ + braunr: [hacklu@darnassus ~]$ ps ps: No applicable processes + ps -eflw + htop + hacklu: Probably the proc_wait_pid and proc_waits_pending stuff + could be simplified then? (Not an urgent issue, of course, will file as + an improvement for later.) + braunr: ps -eflw |grep hacklu + 1038 12360 10746 26 26 2 87 22 148M 1.06M 97:21001 S + p1 0:00.00 grep --color=auto hacklu + 15:08 < braunr> i had to do it myself again :/ + braunr: so as a very common special case, a lot of dead name + notifications cause problems for pf*? + and use your numeric uid + teythoon: yes + braunr: I am so sorry. I only used ps to check. forgive me + teythoon: simply put, a lot of messages cause problems + select is one special use case + braunr: blocking other requests? + the other is page cache writeback + creating lots of threads + potentially deadlocking on failure + and in the case of writebacks, simply starving + braunr: but dead name notifications should mostly trigger + cleanup actions, couldn't those be handled by a different thread(pool) + than the rest? + that's why you can bring down a hurd system with a simple cp + bigfile somewhere, bigfile being a few hundreds MiBs + teythoon: it doesn't change the problem + threads are per task + and the contention would remain the same + hm + since dead-name notifications are meant to release resources + created by what would then be "regular" threads + don't worry, there is a solution + it's simple + it's well known + it's just hard to directly apply to the hurd + and impossible to enforce on mach + tschwinge: I am confuzed after I have look into S_proc_wait() + [hurd/proc/wait.c], it has relate pthread_hurd_cond_wait_np. I can't find + out when it will return. And the signal is report to the debuger by + S_proc_wait. + braunr: a pointer please ;) + teythoon: basically, synchronous ipc + then, enforcing one server thread per client thread + and replace mach-generated notifications with messages sent from + client threads + the only kind of notification required by the hurd are no-senders + notifications + this happens when a client releases all references it has to a + resource + so it's easy to make that synchronous as well + trying to design RPCs as closely as system calls on monolithic + kernels helps in viewing how this works + the only real additions are address space crossing, and capability + invocation + sounds reasonable, why is it hard to apply to the hurd? most + rpcs are synchonous, no? + mach ipc isn't + braunr: When client C send a request to server S, but doesn't wait + for the reply message right now, for a while, C call mach_msg to recieve + reply. Can I think this is a synchronous RPC? + a malicious client can still overflow message queues + hacklu: no + yes, I can see how this is impossible to enforce, but still we + could all try to play nice :) + teythoon: no + :) + async ipc is heavy, error-prone, less performant than sync ipc + some async ipc is necessary to handle asynchronous events, but + something like unix signals is actually a lot more appropriate + we're diverging from the gsoc though + don't waste too much time on that + 15:13 < braunr> it's just hard to directly apply to the hurd + I wont + why is it hard + almost everything is synchronous on the hurd + except a few critical bits + signals :) + and select + and pagecache writebacks + fixing those parts require some work + which isn't trivial + for example, select should be rewritten not to use dead-name + notifications + adding a light weight signalling mechanism to mach and using + that instead of async ipc? + instead of destroying ports once an event has been received, it + should (synchyronously) remove the requests installed at remote servers + uh no + well maybe but that would be even harder + hacklu: This (proc/wait.c) is related to POSIX thread + cancellation -- I don't think you need to be concerned about that. That + function's "real" exit points are earlier above. + teythoon: do you understand what i mean about select ? + ^^ is that a no go area? + for now it is + we don't want to change the mach interface too much + yes, I get the point about select, but I haven't looked at its + implementation yet + tschwinge: when I want to know the child task's state, I call + proc_wait_request(), unless the child's state not change. the + S_proc_wait() will not return? + it creates ports, puts them in a port set, gives servers send + rights so they can notify about events + y not? it's not that hurd is portable to another mach, or is it? + and is there another that we want to be compatible with? + when an event occurs, all ports are scanned + then destroyed + on destruction, servers are notified by mach + the problem is that the client is free to continue and make more + requests while existing select requests are still being cancelled + uh, yeah, that sounds like a costly way of notifying somewone + the cost isn't the issue + select must do something like that on a multiserver system, you + can't do much about it + but it should be synchronous, so a client can't make more requests + to a server until the current select call is complete + and it shouldn't use a server approach at the client side + client -> server should be synchronous, and server -> client + should be asynchronous (e.g. using a specific SIGSELECT signal like qnx + does) + this is a very clean way to avoid deadlocks and denials of service + yes, I see + qnx actually provides excellent documentation about these issues + and their ipc interface is extremely simple and benefits from + decades of experience on the subject + hacklu: This function implements the POSIX wait call, and per + »man 2 wait«: »The wait() system call suspends execution of the calling + process until one of its children terminates.« + hacklu: This is implemented in glibc in sysdeps/posix/wait.c, + sysdeps/unix/bsd/bsd4.4/waitpid.c, sysdeps/mach/hurd/wait4.c, by invoking + this RPC synchronously. + hacklu: GDB on the other hand, uses this infrastructure (as I + understand it) to detect (that is, to be informed) when a debuggee exits + (that is, when the inferior process terminates). + hacklu: Ah, so maybe I miss-poke earlier: the + pthread_hurd_cond_wait_np implements the blocking. And depending on its + return value the operation will be canceled or restarted (»start_over«). + s%maybe%% + hacklu: Does this information help? + tschwinge: proc_wait_request is not only to detect the inferior + exit. it also detect the child's state change + as tschwinge said, it's wait(2) + tschwinge: and I have see this, when kill a signal to inferior, + the gdb will get the message id=24120 which come from S_proc_wait + braunr: man 2 wait says: wait, waitpid, waitid - wait for process + to change state. (in linux, in hurd there is no man wait) + uh + there is, it's the linux man page :) + make sure you have manpages-dev installed + I always think we are talk about linux's manpage :/ + but regardless the manpage, gdb really call proc_wait_request() to + detect whether inferior's changed states + in any case, keep in mind the hurd is intended to be a posix + system + which means you can always refer to what wait is expected to do + from the posix spec + see + http://pubs.opengroup.org/onlinepubs/9699919799/functions/wait.html + braunr: even in the manpags under hurd, man 2 wait also says: wait + for process to change state. + yes + that's what it's for + what's the problem ? + the problem is what tschwinge has said I don't understand. like + and per »man 2 wait«: »The wait() system call suspends execution of the + calling process until one of its children terminates.« + terminating is a form of state change + historically, wait was intended to monitor process termination + only + so the thread become stoped wait also return + afterwards, process tracing was added too + what ? + so when the child state become stopped, the wait() call will + return? + yes + and I don't know this pthread_hurd_cond_wait_np. + wait *blocks* until the process it references changes state + pthread_hurd_cond_wait_np is the main blocking function in hurd + servers + well, pthread_hurd_cond_timedwait_np actually + all blocking functions end up there + (or in mach_msg) + (well pthread_hurd_cond_timedwait_np calls mach_msg too) + since I use proc_wait_request to get the state change, so the + thread in proc_server will be blocked, not me. is that right? + no + both + this is just a request, why should block me? + because you're waiting for the reply afterwards + or at least, you should be + again, i'm not familiar with those parts + after call proc_wait_request(), gdb does a lot stuffs, and then + call mach_msg to recieve reply. + ok + I think it will be blocked only in mach_msg() if need. + usually, xxx_request are the async send-only versions of RPCs + Yes, that'S my understanding too. + and xxx_reply the async receive-only + so that makes sense + so I have ask you is it a asyn RPC. + yes + 15:18 < hacklu> braunr: When client C send a request to server S, + but doesn't wait for the reply message right now, for a while, C call + mach_msg to recieve reply. Can I think this is a synchronous RPC? + 15:19 < braunr> hacklu: no + if it's not synchronous, it's asynchronous + sorry, I spell wrong. missing a 'a' :/ + S_proc_wait_reply will then be invoked once the procserver + actually answers the "blocking" proc_wait call. + Putting "blocking" in quotes, because (due to the asyncoronous + RPC invocation), GDB has not actually blocked on this. + well, it doesn't call proc_wait + tschwinge: yes, the S_proc_wait_reply is called by + process_reply_server(). + tschwinge: so the "blocked" one is the thread in proc_server . + braunr: Right. »It requests the proc_wait service.« + gdb will also block on mach_msg + 16:05 < braunr> both + braunr: yes, if gdb doesn't call mach_msg to recieve reply it will + not be blocked. + i expect it will always call mach_msg + right ? + braunr: yes, but before it call mach_msg, it does a lot other + things. but finally will call mach_msg + that's ok + that's the kind of things asynchronous IPC allows + tschwinge: I have make a mistake in my week report. The signal + recive by inferior is notified by the proc_server, not the + send_signal. Because the send_singal send a SIGCHLD to gdb's msgport not + gdbself. That make sense. + + +# IRC, freenode, #hurd, 2013-07-30 + + braunr: before I go to sleep last night, this question pop into my + mind. How do you find my hello_world is still alive on darnassus? The + process is not a CPU-heavy or IO-heavy guy. You will not feel any + performance penalization. I am so curious :) + hacklu: have you looked into patching the proc server to allow + reparenting of processes? + teythoon:not yet + hacklu: i've familiarized myself with proc in the last week, + this should get you started nicely: http://paste.debian.net/19985/ + diff --git a/proc/mgt.c b/proc/mgt.c + index 7af9c1a..a11b406 100644 + --- a/proc/mgt.c + +++ b/proc/mgt.c + @@ -159,9 +159,12 @@ S_proc_child (struct proc *parentp, + if (!childp) + return ESRCH; + + + /* XXX */ + if (childp->p_parentset) + return EBUSY; + + + /* XXX if we are reparenting, check permissions. */ + + + mach_port_deallocate (mach_task_self (), childt); + + /* Process identification. + @@ -176,6 +179,7 @@ S_proc_child (struct proc *parentp, + childp->p_owner = parentp->p_owner; + childp->p_noowner = parentp->p_noowner; + + + /* XXX maybe need to fix refcounts if we are reparenting, not sure */ + ids_rele (childp->p_id); + ids_ref (parentp->p_id); + childp->p_id = parentp->p_id; + @@ -183,11 +187,14 @@ S_proc_child (struct proc *parentp, + /* Process hierarchy. Remove from our current location + and place us under our new parent. Sanity check to make sure + parent is currently init. */ + - assert (childp->p_parent == startup_proc); + + assert (childp->p_parent == startup_proc); /* XXX */ + if (childp->p_sib) + childp->p_sib->p_prevsib = childp->p_prevsib; + *childp->p_prevsib = childp->p_sib; + + + /* XXX we probably want to keep a reference to the old + + childp->p_parent around so that if the debugger dies or detaches, + + we can reparent the process to the old parent again */ + childp->p_parent = parentp; + childp->p_sib = parentp->p_ochild; + childp->p_prevsib = &parentp->p_ochild; + the code doing the reparenting is already there, but for now it + is only allowed to happen once at process creation time + teythoon: good job. This is in my todo list, when I implement + attach feature to gdbserver I will need this + hacklu: i use htop + braunr: why is that process so disruptive? + the big problem with those stale processes is that they're in a + state that prevents one important script to complete + there is a bug on the hurd with regard to terminals + when you log out of an ssh session, the terminal remains open for + some reason (bad reference counting somewhere, but it's quite tricky to + identify) + to work around the issue, i have a cron job that calls a script to + kill unused terminals + this works by listing processes + your hello_world processes block that listing + uh, how so? + braunr: ok. I konw. + teythoon: probably the denial of service we were talking about + yesterday + select flooding a server? + no, a program refusing to answer on its msg port + ps has an option -M : + -M, --no-msg-port Don't show info that uses a process's + msg port + the problem is that my script requires those info + ah, I see, right + hacklu being working on gdb, it's not surprising he's messing with + that + yes indeed. couldn't ps use a timeout to detect that? + braunr: yes, once I have found ps will hang when I has run + hello_world in a breakpoint state. + braunr: thanks for explaining the issue, i always wondered why + that process is such big a deal ;) + teythoon: how do you tell between processes being slow to answer + and intentionnally refusing to answer ? + a timeout is almost never the right solution + sometimes it's the only solution though, like for networking + but on a system running on a local machine, there is usually + another way + braunr: I don't of course + ? + ah ok + it was rethorical :) + yes I know, and I was implying that I wasn't expecting a timeout + to be the clean solution + and the current behaviour is hardly acceptable + i agree + it's ok for interactive cases + you can use Ctrl-C, which uses a 3 seconds delay to interrupt the + client RPC if nothing happens + braunr: btw, what about *_reply.defs? Should I add a + corresponding reply simpleroutine if I add a routine? + normally yes + right, forgot about that + so that the procedure ids are kept in sync in case one wants to + do this async at some point in the future? + yes + this happened with select + i had to fix the io interface + ok, noted + + +# IRC, freenode, #hurd, 2013-07-31 + + Do we need write any other report for the mid-evaluation? I have + only submit a question-answer to google. + + +# IRC, freenode, #hurd, 2013-08-05 + + hi, this is my weekly + report. http://hacklu.com/blog/gsoc-weekly-report7build-gdbserver-on-gnuhurd-164/ + youpi: can you show me some suggestions about how to design the + interface and structure of gdbserver? + hacklu: well, I've read your blog entry, I was wondering about + tschwinge's opinion, that's why I asked whether he was here + I would tend to start from an existing gdbserver, but as I haven't + seen the code at all, I don't know how much that can help + so you mean I shoule get a worked gdbserver then to improve it? + I'd say so, but again it's not a very strong opinion + I'd rather let tschwinge comment on this + youpi: ok :) + + how about the copyright assignments? did hacklu or teythoon receive + any answer? + youpi: I did, the copyright clerk told me that he finally got my + papers and that everything is in order now + few! + s/f/ph + teythoon: you mean all steps are supposed to be done now, or is he + doing the last steps? I don't see your name in the copyright folder yet + youpi: well, he said that he had the papers and they are about + to be signed + teythoon: ok, so it's not finished, that's why your name is not on + the list yet + this paper stuff is really a pain + youpi: I haven't got any answer from FSF now. + did you ping them recently? + I have pinged 2 week ago. + what you mean of ping? I just write an email to him. Is it enough? + yes + + +# IRC, freenode, #hurd, 2013-08-12 + + hi, this is my weekly report + http://hacklu.com/blog/gsoc-weekly-report8-168/ . sorry for so late. + + hacklu: it seems we misunderstood ourselves last week, I meant to + start from the existing gdbserver implementation + but never mind :) + starting from the lynxos version was a good idea + youpi: em... yeah, the lynxos port is so clean and simple. + + youpi: aha, the "Remote connection closed" problem has been fixed + after I add a init_registers_i386() and set the structure target_desc. + but I don't get understand with the structure target_desc. I only + know it is auto-generated which configured by the configure.srv. + Hi! + hacklu: In gdbserver, you should definitely re-use existing + infrastructure, especially anything that deals with the + protocol/communication with GDB (that is, server.c and its support + files). + hacklu: Then, for the x86 GNU Hurd port, it should be + implemented in the same way as an existing port. The Linux port is the + obvious choice, of course, but it is also fine to begin with something + simpler (like the LynxOS port you've chosen), and then we can still add + more features later on. That is a very good approach actually. + hacklu: The x86 GNU Hurd support will basically consist of + three pieces -- exactly as with GDB's native x86 GNU Hurd port: x86 + processor specific (tge existing gdbserver/i386-low.c etc. -- shouldn't + need any modifications (hopefully)), GNU Hurd specific + (gdbserver/gnu-hurd-low.c (or similar)), and x86 GNU Hurd specific + (gdbserver/gnu-hurd-x86-low.c (or similar)). + s%tge%the + tschwinge: now I have only add a file named gnu-low.c, I should + move some part to the file gnu-i386-low.c I think. + hacklu: That's fine for the moment. We can move the parts + later (everything with 86 in its name, probably). + that's ok. + tschwinge: Can I copy code from gnu-nat.c to + gdbserver/gnu-hurd-low.c? I think the two file will have many same code. + hacklu: That's correct. Ideally, the code should be shared + (for example, in a file in common/), but that's an ongoing discussion in + GDB, for other duplicated code. So, for the moment, it is fine to copy + the parts you need. + hacklu: Oh, but it may be a good idea to add a comment to the + source code, where it is copied from. + maybe I can do a common-part just for hurd gdb port. + That should make it easier later on, to consolidate the + duplicated code into one place. + Or you can do that, of course. If it's not too difficult to + do? + I think at the begining it is not difficult. But when the + gdbserver code grow, the difference with gdb is growing either. That will + be too many #if else. + I think we should check with the GDB maintainers, what they + suggest. + hacklu: Please send an email To: Cc: + , , and ask about + this: you need to duplicate code that already exists in gnu-nat.c for new + gdbserver port -- how to share code? + tschwinge: ok, I will send the email right now. + tschwinge: need I cc to hurd mail-list? + hacklu: Not really for that questions, because that is a + question only relevant to the GDB source code itself. + tschwinge: got it. + +[[!message-id +"CAB8fV=jzv_rPHP3-HQVBA-pCNZNat6PNbh+OJEU7tZgQdKX3+w@mail.gmail.com"]]. + + +# IRC, freenode, #hurd, 2013-08-19 + +. + + when and where is the best time and place to get the regitser + value in gdb? + well, I'm not sure to understand the question + you mean in the gdb source code, right? + isn't it already done in gdb? + probably similarly to i386? + (linux i386 I mean) + I don't find the fetch_register or relate function implement in + gnu-nat.c + so I can't make decision how to implement this in gdbserver. + it's in i386gnu-nat.c, isn't it? + yeah. + does that answer your issue? + thank you. I am so stupid + + +# IRC, freenode, #hurd, 2013-08-26 + + < hacklu> hello everyone, this is my week + report. http://hacklu.com/blog/gsoc-weekly-report10-174/ + + < hacklu> btw, my FSF copyright assignment has been concepted. They guy + said, they have recived my mail for a while but forget to handle it. + + < hacklu> but now I face a new problem, when I typed the first continue + command, gdb will continue all the breakpoint, and the inferior will run + until normally exit. + + +# IRC, freenode, #hurd, 2013-08-30 + + tschwinge: hi, does gdb's attach feature work correctlly on Hurd? + on my hurd-box, the gdb can't attach to a running process, after a + attaching, when I continue, gdb complained "can't find pid 12345" + hacklu: attaching works, not sure why gdb is complaining + teythoon: yeah, it can attaching, but can't contine process. + in this case, the debugger is useless if it can't resume execution + hacklu: well, gdb on Linux reacts a little differently, but for + me attaching and then resuming works + teythoon: yes, gdb on linux works well. + % gdb --pid 21506 /bin/sleep + [...] + (gdb) c + Continuing. + warning: Can't wait for pid 21506: No child processes + # pkill -SIGILL sleep + warning: Pid 21506 died with unknown exit status, using SIGKILL. + yes. I used a sleep program to test too. + I believe that the warning and deficiencies with the signal + handling are b/c on Hurd the debuggee cannot be reparented to the + debugger + oh, I remembered, I have asked this before. + Confirming that attaching to a process in __sleep -> __mach_msg + -> mach_msg_trap works fine, but then after »continue«, I see »warning: + Can't wait for pid 4038: No child processes« and three times »Can't fetch + registers from thread bogus thread id 1: No such thread« and the sleep + process exits (normally, I guess? -- interrupted "system call"). + If detaching (exit GDB) instead, I see »warning: Can't modify + tracing state for pid 4041: No such process« and the sleep process exits. + Attaching to and then issueing »continue« in a process that is + not currently in a mach_msg_trap (tested a trivial »while (1);«) seems to + work. + hacklu: ^ + tschwinge: in my hurdbox, if I just attach a while(1), the system + is near down. nothing can happen, maybe my hardware is slow. + so I can only test on the sleep one. + my gdbserver doesn't support attach feature now. the other basic + feather has implement. I am doing test and review the code now. + Great! :-) + It is fine if attaching does not work currently -- can be added + later. + btw, How can I submit my code? put the patch in email directly? + Did you already run the GDB testsuite using your gdbserver? + no, haven't yet + Either that, or a Git branch to pull from. + I think I should do more review and test than I submit patches. + hacklu: See [GDB]/gdb/testsuite/boards/native-gdbserver.exp + (and similar files) for how to run the GDB testsuite with gdbserver. + ok. + But don't be disappointed if there are still a lot of failures, + etc. It'll already be great if some basic stuff works. + now it can set and remove breakpoint. show register, access + variables. + ... which already is enogh for a lot of debugging sessions. + :-) + I will continue to make it more powerful. + :) + Yes, but please first work on polishing the existing code, and + get it integrated upstream. That will be a great milestone. + No doubt that GDB maintainers will have lots of comments about + proper formatting of the source code, and such things. Trivial, but will + take time to re-work and get right. + oh, I got it. I will give my pathch before this weekend. + Then once your basic gdbserver is included, you can continue to + implement additional features, piece by piece. + And then we can run the GDB testsuite with gdbserver and + compare that there are no regressions, etc. + Heh, »before the weekend« -- that's soon. ;-) + honestly to say, most of the code is copyed from other files, I + haven't write too many code myself. + Good -- this is what I hoped. Often, more time in software + development is spent on integrating existing things rathen than writing + new code. + but I have spent a lot of time to get known the code and to debug + it to work. + Thzis is normal, and is good in fact: existing code has already + been tested and documented (in theory, at least...). + Yes, that's expected too: when relying on/reusing existing + code, you first have to understand it, or at least its interfaces. Doing + that, you're sort of "mentally writing the existing code again". + So, this sounds all fine. :-) + your words make me happy. + :) + Well, I am, because this seems to be going well. + thank you. I am going to coding now~~ + + +# IRC, freenode, #hurd, 2013-09-02 + + hi, this is my weekly + report. http://hacklu.com/blog/gsoc-weekly-report11-181/ + + please give me any advice on how to use mig to generate stub-files + in gdbserver? + hacklu: + http://darnassus.sceen.net/gitweb/rbraun/slabinfo.git/blob/HEAD:/Makefile + braunr: shouldnt' I work like this + https://github.com/hacklu/gdbserver/blob/gdbserver/gdb/config/i386/i386gnu.mh + ? + hacklu: seems that you need server code + other than that i don't see the difference + gdb use autoconf to generate the Makefile, and part from the *.mh + file, but in gdbserver, there is no .mh like files. + hacklu: why can't you reuse /i386gnu.mh ? + braunr: question is that, there are something not need in + /i386gnu.mh. + hacklu: like what ? + braunr: like fork-child.o msg_U.o core-regset.o + hacklu: well, adjust the dependencies as you need + hacklu: do you mean they become useless for gdbserver but are + useful for gdb ? + braunr: yes, so I need another one gnu.mh file. + braunr: but the gdbserver's configure doesn't have any *.mh file, + can I add the first one? + or adjust the values of those variables depending on the building + mode + maybe + tschwinge is likely to better answer those questions + braunr: ok, I will wait for tschwinge's advice. + hacklu, The gdb/config/ dir is for files related to the native + gdb builds, as opposed to a cross gdb that does not have any native bits + in it. In the latter, gdbserver will be used to touch the native layer, + and GDB will only guide gdbserver through the debugging session... + hacklu, In case you haven't figured that out already. + luisgpm: I am not very clear with you. According to your words, I + shouldn't use gdb/config for gdbserver? + hacklu, Correct. You should use configure.srv for gdbserver. + hacklu, gdb/gdbserver/configure.srv that is. + hacklu, gdb/configure.tgt for non-native gdb files... + hacklu, and gdb/config for native gdb files. + hacklu, The native/non-native separation for gdb is due to the + possibility of having a cross gdb. + what's srv file purpose? + hacklu, gdbserver, on the other hand, is always native. + Doing the target-to-object-files mapping. + how can I use configure.srv to config the MIG to generate + stub-files? + What are stub-files in this context? + On Hurd, some rpc stub file are auto-gen by MIG with *.defs file + luisgpm: c source code handling low level ipc stuff + mig is the mach interface generator + luisgpm, hacklu: If that is still helpful by now, in + + I described the MIG usage in GDB. (Which also states that ptrace is a + system call which it is not.) + hacklu: For the moment, it is fine to indeed copy the rules + related to MIG/RPC stubs from gdb/config/i386/i386gnu.mh to a (possibly + new) file in gdbserver. Then, later, we should work out how to properly + share these, as with all the other code that is currently duplicated for + GDB proper and gdbserver. + hacklu, tschwinge: If there is code gdbserver and native gdb can + use, feel free to put them inside gdb/common for now. + hacklu, luisgpm: Right, that was the conclusion from + . + tschwinge, luisgpm : ok, I got it. + tschwinge: sorry for haven't submit pathes yet, I will try to + submit my patch tomorrow. + +[[!message-id "CAB8fV=iw783uGF8sWyqJNcWR0j_jaY5XO+FR3TyPatMGJ8Fdjw@mail.gmail.com"]]. + + +# IRC, freenode, #hurd, 2013-09-06 + + If I want compile a file which is not in the current directory, + how should I change the Makefile. I have tried that obj:../foo.c, but the + foo.o will be in ../, not in the current directory. + As say, When I build gdbserver, I want to use [gdb]/gdb/gnu-nat.c, + How can I get the gnu-nat.o under gdbserver's directory? + tschwinge: ^^ + Hi! + hacklu: Heh, unexpected problem. + hacklu: How is this handled for the files that are already in + gdb/common/? I think these would have the very same problem? + tschwinge: ah. + I got it + I see, for example: + ./gdb/Makefile.in:linux-btrace.o: + ${srcdir}/common/linux-btrace.c + ./gdb/gdbserver/Makefile.in:linux-btrace.o: + ../common/linux-btrace.c $(linux_btrace_h) $(server_h) + If I have asked before, I won't use soft link to solve this. + But isn't that what you've been trying? + when this, where the .o file go to? + Yes, symlinks can't be used, because they're not available on + every (file) system GDB can be built on. + I would assume the .o files to go into the current working + directory. + Wonder why this didn't work for you. + in gdbserver/configure.srv, there is a srv_tgtobj="gnu_nat.c ..", + if I change the Makefile.in, it doesn't gdb's way. + So I can't use the variable srv_tgtobj? + That should be srv_tgtobj="gnu_nat.o [...]"? (Not .c.) + I have try this, srv_tgtobj="../gnu_nat.c", then the gnu_nat.o is + generate in the parent directory. + s/.c/.o + (wrong input) + For my understand now, I should set the srv_tgtobj="", and then + set the gnu_nat.o:../gnu_nat.c in the gdbserver/Makefile.in. right? + Hmm, I thought you'd need both. + Have you tried that? + no, haven't yet. I will try soon. + I have met an strange thing. I have this in Makefile, + i386gnu-nat.o:../i386gnu-nat.c $(CC) -c $(CPPFLAGS) $(INTERNAL_CFLAGS) $< + When make, it will complain that: no rules for target + i386gnu-nat.c + but I also have a line gnu-nat.o:../gnu-nat.c ../gnu-nat.h. this + works well. + hacklu: Does it work if you use $(srcdir)/../i386gnu-nat.c + instead of ../i386gnu-nat.c? + Or similar. + I have try this, i386gnu-nat.c: echo "" ; then it works. + (try $(srcdir) ing..) + make: *** No rule to make target `.../i386gnu-nat.c', needed by + `i386gnu-nat.o'. Stop. + seems no use. + tschwinge: I have found another thing, if I rename the + i386gnu-nat.o to other one, like i386gnu-nat2.o. It works! + + +# IRC, freenode, #hurd, 2013-09-07 + + hi, I have found many '^L' in gnu-nat.c, should I fix it or keep + origin? + hacklu: fix in what sense? + remove the line contains ^L + hacklu: see bottom of + http://www.gnu.org/prep/standards/standards.html#Formatting + hacklu: "Please use formfeed characters (control-L) to divide the + program into pages at logical places (but not within a function)." + hacklu: so unless a reason has come up to deviate from the gnu + coding standards, those ^L's are there by design + LarstiQ: Thank you! I always think that are some format error. I + am stupid. + hacklu: not stupid, you just weren't aware + * LarstiQ thought the same when he first encountered them + + +# IRC, freenode, #hurd, 2013-09-09 + + hacklu_, hacklu__: I don't know what tschwinge thinks, but I guess + you should work with upstream on integration of your existing work, this + is part of the gsoc goal: submitting one's stuff to projects + youpi: Which is what we're doing (see the patches recently + posted). :-) + ok + youpi: I always doing what you have suggest. :) + I have asked in my new mail, I want to ask at here again. Should + I change the gdb use lwp filed instead of tid field? There are + too many functions use tid. Like + named tid in the structure proc also. + make_proc(),inf_tid_to_thread(),ptid_build(), and there is a field + (sorry for the bad \n ) + and this is my weekly + report. http://hacklu.com/blog/gsoc-weekly-report12-186/ + And in Pedro Alves's reply, he want me to integration only one + back-end for gdb and gdbserver. but the struct target_obs are just + decalre different in both of the two. How can I integrate this? or I got + the mistaken understanding? + tschwinge: ^^ + hacklu: I will take this to email, so that Pedro et al. can + comment, too. + hacklu: I'm not sure about your struct target_ops question. + Can you replay to Pedro's email to ask about this? + tschwinge: ok. + hacklu: I have sent an email about the LWP/TID question. + tschwinge: Thanks for your email, now I know how to fix the + LWP/TID for this moment. + hacklu: Let's hope that Pedro also is fine with this. :-) + tschwinge: BTW, I have a question, if we just use a locally + auto-generated number to distignuish threads in a process, How can we do + that? + How can we know which thread throwed the exception? + I haven't thought about this before. + hacklu: make_proc sets up a mapping from Mach threads to GDB's + TIDs. And then, for example inf_tid_to_thread is used to look that up. + tschwinge: oh, yeah. that is. + + +# IRC, freenode, #hurd, 2013-09-16 + + hacklu: Even when waiting for Pedro (and me) to comment, I + guess you're not out of work, but can continue in parallel with other + things, or improve the patch? + tschwinge: honestly to say, these days I am out of work T_T after + I have update the patch. + I am not sure how to improve the patch beyond your comment in the + email. I have just run some testcase and nothing others. + hacklu: I have not yet seen any report on the GDB testsuite + results using your gdbserver port (see + gdb/testsuite/boards/native-gdbserver.exp). :-D + question is, the resule of that testcase is just how many pass how + many not pass. + and I am not sure whether need to give this information. + Just as a native run of GDB's testsuite, this will create *.sum + and *.log files, and these you can diff to those of a native run of GDB's + testsuite. + https://paste.debian.net/41066/ this is my result + === gdb Summary === + + # of expected passes 15573 + # of unexpected failures 609 + # of unexpected successes 1 + # of expected failures 31 + # of known failures 57 + # of unresolved testcases 6 + # of untested testcases 47 + # of unsupported tests 189 + /home/hacklu/code/gdb/gdb/testsuite/../../gdb/gdb version 7.6.50.20130619-cvs -nw -nx -data-directory /home/hacklu/code/gdb/gdb/testsuite/../data-directory + + make[3]: *** [check-single] Error 1 + make[3]: Leaving directory `/home/hacklu/code/gdb/gdb/testsuite' + make[2]: *** [check] Error 2 + make[2]: Leaving directory `/home/hacklu/code/gdb/gdb' + make[1]: *** [check-gdb] Error 2 + make[1]: Leaving directory `/home/hacklu/code/gdb' + make: *** [do-check] Error 2 + I got a make error so I don't get the *.sum and *.log file. + Well, that should be fixed then? + hacklu: When does university start again for you? + My university have start a week ago. + but I will fix this, + Oh, OK. So you won't have too much time anymore for GDB/Hurd + work? + it is my duty to finish my work. + time is not the main problem to me, I will shedule it for myself. + hacklu: Thanks! Of course, we'd be very happy if you stay with + us, and continue working on this project (or another one)! :-D + I also thanks all of you who helped me and mentor me to improve + myself. + then, what the next I can do is that fix the testcase failed? + hacklu: It's been our pleasure! + hacklu: A comparison of the GDB testsuite results for a native + and gdbserver run would be good to get an understanding of the current + status. + ok, I will give this comparison soon. BTW,should I compare the + native gdb result with the one before my patch + You mean compare the native run before and after your patch? + Yes, that also wouldn't hurt to do, to show that your patch doesn't + introduce any regressions to the native GDB port. + ok, beside this I should compare the native gdb with gdbserver ? + Yes. + beside this, what I can do more? + No doubt, there will be differences between the native and + gdbserver test runs -- the goal is to reduce these. (This will probably + translate to: implement more stuff for the Hurd port of gdbserver.) + ok, I know it. Start it now + As time permits. :-) + It's ok. :) + + +# IRC, freenode, #hurd, 2013-09-23 + + I have to go out in a few miniutes, will be back at 8pm. I am + sorry to miss the meeting this week, I will finishi my report soon. + tschwinge, youpi ^^ diff --git a/community/gsoc/2013/nlightnfotis.mdwn b/community/gsoc/2013/nlightnfotis.mdwn index 43f9b14c..a9176f51 100644 --- a/community/gsoc/2013/nlightnfotis.mdwn +++ b/community/gsoc/2013/nlightnfotis.mdwn @@ -448,3 +448,2590 @@ License|/fdl]]."]]"""]] nlightnfotis: OK, so probably waiting at the FSF office to be processed. Let's allow for some more time. After all, this is not critical for your progress. + + +# IRC, freenode, #hurd, 2013-07-10 + + tschwinge: I have run the diff of the GCC repo on the Hurd + against the one on my host linux os, and there was nothing relevant to + fixcontext and initcontext that are the ones that fail the + compilation. In any case I did recheck out the branch, and I have + attempted a build with it. It fails at the same point. Now I am + attempting a build with the -w (inhibit warnings) flag enabled + nlightnfotis: Have there been any differences in the diff? + There should be none at all. + tschwinge: there were some small changes due to the repo's + being checked out at different times. It was a large diff however. I + inspected it and didn't find anythign that was of much use. Here it is in + case you might want to see it: + https://www.dropbox.com/s/ilgc3skmhst7lpv/diffs_in_git.txt + nlightnfotis: Well, the idea of this exercise precisely was to + use the same Git revisions on both sides of the diff -- to show that + there are no spurious differences -- which can't be shown from your + 124486 lines diff. (Even though indeed there is no difference in + libgo/configure that would explain the mis-match, but who knows what else + might be relevant for that. + Would you please repeat that? + tschwinge: I will do so. It was wrong from me to not diff + against the same revisions, but going through the diff results grepping + for the problematic code didn't yield any results, so I thought that + might not be the issue. + I will perform the diff again tomorrow morning and report on + the results. + nlightnfotis: Anyway, if you checked out again, the latest + revision, and it still fails in exactly the same way, there is something + wrong. + nlightnfotis: And -w won't help, as there is a hard error + involved. + nlightnfotis: Are yous till working on GSoC things today? + tschwinge: yeah I am here. I decided to do the diff today + instead of tomorrow. + It finished now btw + let me tell you + ah and this time, the gits were checked out at the same time + from the same source + and are at the same branch + nlightnfotis: Coulod you upload the + gccbuild/i686-unknown-gnu0.3/libgo/config.log of the build that failed? + tschwinge: sure. give me a minute + tschwinge: there is something strange going on. The two + repos are at the exact same state (or at least should be, and the logs + indicate them to be) but still the diff output is 4.4 mb + but no presence of initcontext of fixcontext + tschwinge: the config.log file --> + http://pastebin.com/bSCW1JfF + wow! I can see several errors in the config.log file + but I am not so sure about their fatality. Config returns 0 + at the end of the log + nlightnfotis: As the configure scripts probe for all kings of + features on all kings of strange systems, it's to be expected that some + of these fail on GNU/Hurd. + What is not expected, however, is: + configure:15046: checking whether setcontext clobbers TLS + variables + [...] + configure:15172: ./conftest + /root/gcc_new/gcc/libgo/configure: line 1740: 1015 Aborted + ./conftest$ac_exeext + Hmm. apt-cache policy libc0.3 + nlightnfotis: ^ + tschwinge: Installed 2.13-39+hurd.3 + Candidate: 2.1-6 + *2.17 + Bummer. + nlightnfotis: As indicated in + + and thereabouts, you need 2.17-3+hurd.4 or later... + Well. + At least that now explains what is going on. + tschwinge: i see. I am in the process of updating my hurd + vm. I saw that libc has also been updated to 2.17 + I will confirm when updating is done + nlightnfotis: Anyway, is the diff between the two repositories + empty now or are there still differences? + there are differences + and they were checked out at the same time + from the same source + (the official git mirror) + and they are both at the same branch + and still diff output is 4.4 MB + but quick grepping into it and there is not mention of + initcontext or fixcontext + That's... unexpected. + may be a mistake I am making + but considering that diff run for some time before + completing + In both Git repositories, »git rev-parse HEAD« shows the same + thing? + Could you please upload the diff again? + tschwinge: confirmed. libc is now version 2.17-1 + tschwinge: http://pastebin.com/bSCW1JfF + for the rev-parse give me a second + nlightnfotis: Where is libc0.3 2.17-1 coming from? You need + 2.17-3+hurd.4 or later. + it is 2.17-7+hurd.1 + OK, good. + The URL you just have is the config.log file, not the diff. + s%have%gave + oh my mistake + wait a minute + the two repos have different output to rev-parse + Phew. + That explains. + So the Git branches are at different revisions. + that confused me... when I run git pull -a the branches that + were changed were all updated to the same revision + unless... there were some automatic merges in the *host* GCC + repo required during some pulls + but that was some time ago + would it have messed my local history that much? + that's the only thing that may be different between the two + repos + they checkout from the same source + nlightnfotis: At which revisions are the two + repositories/branches? + I have never used »put pull -a«. What does that do? + tschwinge: from what I know it does an automatic git fetch + followed by git merge. The -a flag must signal to pull all branches (I + think it's possible to pull only one branch) + That's the --all option. -a is something different (that I + don't understand off-hand). + Well, --all means to pull all remotes. + But you just want the GCC upstream, I guess. + I always use git fetch and git merge manually. + oh my god! You are write. -a is equivallent to --append + + https://www.kernel.org/pub/software/scm/git/docs/git-pull.html + git pull must be safe though + + http://stackoverflow.com/questions/292357/whats-the-difference-between-git-pull-and-git-fetch + without the -a + *right + why did I even write "right" as "write" above I don't + even... + what did I write in the sentence above + oh my god... + tschwinge: they are indeed on different revisions: The host + repo's last commit was made by me apparently, to merge master into + tschwinge/t/hurd/go, whereas the last commit of the Hurd repo was by you + and it reverted commit 2eb51ea + and that should also explain the large diff file + with master merged into the tschwinge/t/hurd/go branch + I will purge the debian repo and redownload it + *reclone it + that should bring it to a safe state I suppose. + + +# IRC, freenode, #hurd, 2013-07-11 + + nlightnfotis: how's your build going? + I tried one earlier and it seemed to build without any + issues, something that was...strange. I am repeating the build now, but I + am saving the compilation output this time to study it. + it was strange that the build succeeded? that sounds sad :/ + teythoon: considering that 3 weeks now I failed to build it + without errors, it sure seems weird that it builds without errors now :) + what did you change ? + braunr: not many things apparently. To be honest the change + that seemed to do the trick was (under thomas' guidance) update of libc + from 2.13 to 2.17 + well that can explain + tschwinge: Big update! GCC-go not compiles without errors + under the Hurd. I have done 2 compilations so far, none of which had + issues. Time needed for full build (without bootstrap) is 45 minutes +- 1 + minute. I also run the test suite, and I can confirm your results + s/not/now/, perhaps? + pinotree yeah. I don't know how it came up with not there. I + meant now + tschwinge: link for the go.sum is here --> + https://www.dropbox.com/s/7qze9znhv96t1wj/go.sum + + +# IRC, freenode, #hurd, 2013-07-12 + + nlightnfotis: Great! So you finally reproduced my results. + :-) + tschwinge: Yep! I am now building a blog, so that I can move + my reports there, so that they are more detailed, to allow for greater + transparency of my actions + nlightnfotis: Did you recently (in email, I think?) indicate + that there is another Go testsuite, for libgo? + nlightnfotis: As you prefer. + tschwinge: there seemed to be one, at least in linux. I + think I saw one in the Hurd too. + Oh indeed there is a libgo testsuite, too. + as a matter of fact, make check-go + did check for the lib + but lib was failing + yeah + So please have a look at that testsuite's results, too, and + compare to the GNU/Linux ones. + sure. I can do that now. + And for the go.sum you posted, please have a look at the tests + that do not pass (»grep -v ^PASS: < go.sum«), assuming they do pass on + GNU/Linux. + I suggest you add a list of the differences between GNU/Linux + and GNU/Hurd testresults to the wiki page, + , at the end of + the Part I section. + I'm on it. + For now, please ignore any failing tests that have »select« in + their name -- that is, do file them, but do not spend a lot of time + figuring out what might be wrong there. + The Hurd's select implementation is a bit of a beast, and I + don't want you -- at this time -- spend a lot of time on that. We + already know there are some deficiencies, so we should postpone that to + later. + tschwinge: noted. + So what I would like at the moment, is a list of the testresult + differences to GNU/Linux, then from the go.log file any useful + information about the failing test (which perhaps already explains) + what's going wrong, and then a analysis of the failure. + nlightnfotis: I assume you must be really happy that you + finally got it build fine, and reproduced my results. :-) + tschwinge: yeah! I can not hide from you the fact that + failing all those builds made me really nervous about me missing my + schedule. Having finally built that and revisiting my application I can + see I am on schedule, but I have to intensify my work to compensate for + any potential unforeseen obstacles + , in the futute + *future + + +# IRC, freenode, #hurd, 2013-07-15 + + nlightnfotis: btw, do you have a weekly progress report? + youpi: not yet. Will write it shortly and post it here. I + made a new blog to keep track of my progress. + Will report much more frequently now via my blog + did you add your blog url to the hurd iwki? + currently I am running gcc tests on both gcc go and libgo to + see what the differences are with Linux + I believe I have done so, let me see + youpi: gccgo passes most of its tests (it fails a small + number, and I am looking into those tests) but libgo fails 130/131 tests + (on the Hurd that is) + ok + + guys I wrote my report. This time I made it available on my + personal blog. You can find it here: + www.fotiskoutoulakis.com/blog/2013/07/15/gsoc-week-4-report/ As always, + open to (and encouraging) criticism, suggestions, anything that might + help me. + I also have to mention that now that my personal website is + online, I will report much more frequently, to the scale of reporting day + by day, or every 2-3 days. + nlightnfotis: without spending time on select, it'd be good to have + an idea of what is going wrong + eh, go having trouble with select + select is a beast, but we do have fixed things lately and we don't + currently know any issue still pending + youpi: are you suggesting to not skip the select tests too? + select is kind of critical .. + as youpi said, if you can determine what's wrong, at the interface + level (not the implementation), it would be a good thing to do + so we know what's wrong + we're not asking to fix it, though + braunr: youpi: noted. Thanks for the feedback. Is there + something else you might want me to improve? Something with the report + itself? Something you were expecting to see but I failed to provide? + no it's ok + it's short, readable, and readily answers the questions i might + have had so it's good + as you say, now you have to work on the core of your task :) + note: the "select" word in the testsuite is not strictly bound to + the C "select" + so it is probably really worth digging a bit at least on the go + side + but it's really worth doing in the end, as it will probably reveal + some nasty bugs on the way + I appreciate your input. I will start working on it asap + (today) and will report on Wednesday perhaps (or Thursday at worst). + + +# IRC, freenode, #hurd, 2013-07-18 + + braunr: I found out what was causing the fails in the tests + in both libgo and gccgo + it's a assertion: mach_port_t ktid = __mach_thread_self (); + int ok = thread->kernel_thread == ktid; __mach_port_deallocate + ((__mach_task_self_ + 0), ktid); ok; }) + is all that the assertion ? + yes + please paste the code somewhere + or is it in libpthread ? + http://pastebin.com/G2w9d474 + nonblock.x: ./pthread/pt-create.c:167: __pthread_create_internal: Assertion `({ mach_port_t ktid = __mach_thread_self (); int ok = thread->kernel_thread == ktid; __mach_port_deallocate ((__mach_task_self_ + 0), ktid); ok; })' failed. + 9 FAIL: go.test/test/chan/nonblock.go execution, -O2 -g + yes + that's related to my current work on thread destruction + +[[open_issues/libpthread/t/fix_have_kernel_resources]]. + + thread resources recycling is buggy + i suggest you make your own thread pool if you can + I will look into it further and let you know. Thanks for + that. + + +# IRC, freenode, #hurd, 2013-07-22 + + tschwinge, I have found what is failing both libgo and gccgo + tests, but for the life of me, I can not really find the offending code + on any repository. + not even the eglibc-source debian package. it's driving me + insane. + nlightnfotis: If this is driving you insane, we should quickly + have a look at that! + thanks tschwinge: I have found that the offending code is an + assertion: { mach_port_t ktid = __mach_thread_self (); int ok = + thread->kernel_th read == ktid; __mach_port_deallocate ((__mach_task_s + elf_ + 0), ktid); ok; } on a file called pt-create.c under the + libpthread on line 167 + but for the life of me, I can not find that piece of code + anywhere. And when I mean anywhere, I mean anywhere. I have looked for it + on all of the branches of glibc, libpthread and the source code of + eglibc. + that's why if you don't mind I would like to write my report + in a day or two, when (hopefully) I will have more progress to report on. + nlightnfotis: isn't that libpthread/sysdeps/mach/pt-thread-start.c + ? + or rather, ./sysdeps/mach/hurd/pt-sysdep.h + youpi: let me check this out. If that's it I'm gonna cry. + which unfortunately is inlined in a lot of places + nlightnfotis: does the assertion not tell you the file & line? + youpi: holy smokes! That's the code I was looking for! Oh + boy. Yeah the logs do tell me, but it was very misleading. So misleading, + taht I was actually looking at the wrong place. All logs suggest that + this piece of code is at libpthread/pthread/pt-create.c in line 167 + what is that line in your tree? + a call to _pthread_self(), isn't it? + then it's not actually misleading, this is indeed where the + pt-sysdep.h definition gets inlined + it seems so, yeah. it's err = __pthread_sigstate + (_pthread_self (), 0, 0, &sigset, 0); + nlightnfotis: and what is the backtrace? + youpi: _pthread_create_internal: Assertion failed. + The assertion is the one above + nlightnfotis: sure, but what is the backtrace? + I don't have the full backtrace. These are the logs from the + compiler. All I can get is: reports like this: nonblock.x: + ./pthread/pt-create.c:167: __pthread_create_internal: Assertion `({ + mach_port_t ktid = __mach_thread_self (); int ok = thread->kernel_thread + == ktid; __mach_port_deallocate ((__mach_task_self_ + 0), ktid); + ok; })' failed. + nlightnfotis: you should probably have a look at running the tests + by hand + so you can run them in a debugger, and get backtraces etc. + nlightnfotis: did i answer that ? + braunr: which one? + the problems you're seeing are the pthread resources leaks i've + been trying to fix lately + they're not only leaks + creation and destruction are buggy + I have read so in + http://www.gnu.org/software/hurd/libpthread.html. I believe it's under + Thread's Death right? + nlightnfotis: yes but it's buggy + and the description doesn't describe the bugs + so we will either have to find a temporary workaround, or + better yet work on a fix, right? + nlightnfotis: i also told you the work around + nlightnfotis: create a thread pool + braunr: since thread creation is also buggy, wouldn't the + thread pool be buggy too? + nlightnfotis: creation *and* destruction is buggy + nlightnfotis: i.e. recycling is buggy + nlightnfotis: the hurd servers aren't affected much because the + worker threads are actually never destroyed on debian (because of a + debian specific patch) + + youpi, nlightnfotis, hacklu_: btw, what about the copyright + assignment process + nlightnfotis just got his on file, so there is progress. + I have email from Donald R Robertson III + about that -- but it is not yet present in the + FSF copyright.list file... + I think I received that email because I was CCed on + nlightnfotis' submission. + tschwinge: I have got the papers, and they were signed by + the FSF. They stated delivery date 11 of July, but the documents were + signed on the 10th of July :P + Ah, no, I received it via hurd-maintainers@gnu.org -- and the + strange thing is that not all assignments that got processed got sent + there... + At the recent GNU Tools Cauldron we also discussed this in the + GCC context; and their experience was the very same. Emails get lost, + and/or take ages to be processed, etc. + It seems the FSF is undermanned. + + +# IRC, freenode, #hurd, 2013-07-27 + + I have one question about the Mach sources: I can see it + uses its own scheduler (more like, initializes) and also does the same + for the linux scheduler. Which one does it use? + it doesn't use the linux scheduler + the linux glue just glues linux scheduling concepts onto the mach + scheduler + ohh I see now. Thanks for that youpi. + + +# IRC, freenode, #hurd, 2013-07-28 + + In the mach kernel source code, does the (void) before a + function call have a semantic meaning, or is it just remnants of the past + (or even documentation) + for example? + pinotree: (void) thread_create (kernel_task, + &startup_thread); + I read on stack overflow that there is only one case where + it has a semantic meaning, most of the times it doesn't + + http://stackoverflow.com/questions/13954517/use-of-void-before-a-function-call + most probably thread_create has a non-void return value, and + this way you're explicitly suppressing its return value (usually because + you don't want/need to care about it) + isn't the value discarded if the (void) is not there? + yes, but depending on extra attributes and/or compiler warning + flags the compiler might warn that the return value is not used while it + ought to + the cast to void should suppress that + oh, okay, thanks for that pinotree + and yes you are right that thread_create actually does + return something + even if there would be no compiler message about that, adding + the explicit cast could mean "yes, i know the function does return + something, but i don't care about it" + ... as hint to other code readers + as a form of documentation then + also + + oh well, I am gonna ask and I hope someone will answer it: + In the Mach's dmesg (/var/log/dmesg) I can see that the version string + along with initial memory mapping information are printed twice, when in + fact they are supposed to be called only once. Is this a bug, or some + buffering error, or are they actually called twice for some reason? + + +# IRC, freenode, #hurd, 2013-07-29 + + guys is the evaluation today? + yes + right + where can we find the evaluation papers on melange? + wait untill 12pm UTC. + yeah, I just noticed thanks hacklu_ + nlightnfotis:) + + tschwinge: I only have one question regarding my project. If + I make some changes to libpthread, what's the best way to test them in + the hurd? Rebuild glibc with the updated libpthread? + NlightNFotis: Yes, you'll have to rebuild glibc. I have a + cheat sheet for that: + http://darnassus.sceen.net/~hurd-web/open_issues/glibc/debian/ + It may be that the »Run debian/rules patch to apply patches« + step is no longer encessary with the 2.17 glibc packages. + thanks for that tschwinge. :) + NlightNFotis: Sure. :-) + + NlightNFotis: Where's your weekly status? + I will write it today at the noon. I have written all the + other ones, and they are available at www.fotiskoutoulakis.com + the next one will be available there as well, later in the + day + Ack. But please try to finish your report before the meeting, + as discussed. + oh, forgive me for that. I thought it was ok to write my + report a day or so later. Sorry. + NlightNFotis: Please write your report as soon as possible -- + otherwise there's no useful way for me to know what your status is. + I will. This week I have been mostly going through the + various sources (the Hurd, Mach and libpthread, especially the last two) + in my attempt to get a better understanding for how libpthread + works. Since yesterday I have attempted some small changes on my + libpthread repo that I plan on testing and reporting on them. That's why + I still have not written my report. + NlightNFotis: Things don't need to be finished before you + report about them. It's often more useful to discuss issues *before* you + spend time on implementing them. + #hurd + NlightNFotis: what kind of changes do you want to add to + libpthread ? + Have a look at the asseriton failure, I would hope. :-) + well no + again, i did that + and it's not easy to fix + braunr: I was looking into ways that I could create the + thread pool you suggested into libpthread + no, don't + create it in your application + not in libpthread + well, this may not be an acceptable solution either .. + Before doing that we have to understand what exactly the Go + runtime is doing. It may just be a weird itneraction with the setcontext + et al. functions that I failed to think about when implementing these? + the other possibility is the go runtime libraries. But I + thought that libpthread might be a better idea, since you told me that + creation *and* destruction are buggy + braunr: you are right, the signal thread is always exist. I have + got a wrong understand before. + tschwinge: I can look into that, now. I will also include + that in my report. + NlightNFotis: i don't see how this is a relevant argument .. + tschwinge: i'd suggest he first try with a custom pool in the go + runtime, so we exclude what you're suspecting + if this pool actually works around the issues NlightNFotis is + having, it will confirm the offending problem comes from libpthread + So, as a very first step make any thread + distruction/deallocation a no-op. + yes + braunr: I originally understood that a thread pool might + skip the thread's destruction, so that we escape the buggy part with the + thread's destruction. Since that was a problem with libpthread, it sure + affects other threads (instead of go's ) too. So I assumed that building + the thread pool into libpthread might help eliminate bugs that may affect + other code too. + no, it's not a proper fix + it's a work around + and i'm working on a proper fix in parallel + (when i have the time, that is :/) + oh, I see. So for the time, I had better not touch + libpthread, and take a look at the go run time aye? + NlightNFotis: Remember: one thing after the other. First + identify what is wrong exactly. Then think and discuss how to solve the + very specific issue. Then implement it. + as tschwinge said, make thread destruction a nop in go + see if that helps + NlightNFotis: For example, you surely have noticed (per your + last report), that basically all Go language test pass (aside from the + handful of those testing select, etc.) -- but all those of the libgo + runtime library fail, literally all of them. + You noticed they basically all fail with the same assertion + failure. But why do all the Go language ones work fine? + Don't they execute the program they built, for example? + (I haven't looked.) + they do execute the program. the language ones that fail + too, fail due to the assertion failure + Or, what else is different for them? How are they built, which + flags, how are they invoked. + how many goroutines ? + :p + Do you also get the assertion failure when you built a small Go + program yourself and run that one. + Don't get the assertion failure? Then add some more complex + stuff that are likely to invole adding/re-using new threads, such as + goroutines. + I didn't get the assertion failure on a small test program, + but now that you suggest it it might be a good idea to build a custom + test suite + Etc. That way you'll eventually get an understanding what + triggers the assertion failure. + And that exeactly is the kind of analysis I'd like to read in + your weekly report. + A list of things what you have done, which assuptions you've + made, how that directed your further analysis, what results that gave, + etc. + I will do it. I will try to rush to finish it today before + you leave, so that you can inspect it. God I feel like all that time I + spent this week studying the particular source code (libpthread, and the + Mach) were in vain... + on second thoughts, it was not in vain. I got a pretty good + understanding of how these pieces of software work, but now I will have + to do something completely different. + Studying code is never in vain. + Exactly. + You must have had some motivation to study the code, so that + was surely a valid thing to do. + But we'd link to understand your reasoning, so that we can + support you and direct you accordingly. + but it's better to focus on your goals and determine an + appropriate course of actions, usually starting with good analysis + Yes. + s/link/like/? + pinotree: Indeed, thanks. + makes me remember when i implemented radix trees to replace splay + trees, only to realize splay trees were barely used .. + braunr: Yes. It has happened to all of us. ;-P + NlightNFotis: So, don't worry -- but learn from such things. + :-) + anyway, I will start right away with the courses of action + you suggested, and will try to have finished them by noon. Thanks for + your help, it really means a lot. + In software generally, it is never a good idea to let you be + distracted, and don't follow your focus goal, because there are always so + many different things that could be improved/learned/fixed/etc. + tschwinge, I am only nervous about one thing: the fact that + I have not submitted yet any patch or some piece of code in general. Then + again, the summer of code for me so far has been 70-80% reading about + stuff I didn't know about and 30-20% doing the stuff I should know + about... + NlightNFotis: That's why we're here, to teach you something. + Which we're happy to do, but we all need to cooperate for that (and I'm + well aware that this is difficult if one is not in the same rooms, and + I'm also aware that my time is pretty limited). + NlightNFotis: We're also very aware that the Hurd system, as + any operating system project (if you're not just doing "superficial" + things) is difficult, and takes lots of time to learn, and have concepts + and things sink into your brain. + i wouldn't worry too much + We're also still learning every day. + go doesn't require a lot from the underlying system, but what is + required is critical + once you identify it, coding will be quick + tschwinge: braunr: thanks. I shall begin working following + the directions you gave to me. + NlightNFotis: So yes, because Google wants us to grade you + based on that, you'll eventually have to write some code, but for + example, a patch to disable thread distruction/deallocation in libgo + would definitely count as such code. And that seems like one of your + next steps. + tschwinge: i need to deliver that instantly, right? seeing + as the evaluation is today. + NlightNFotis: No. Deliver it when you have something to + deliver. :-) + tschwinge: I am nervous about the evaluation today. I have + not submitted a single piece of code, only some reports. How negatively + does this influence my performance report? + NlightNFotis: If I can say so, in the evaluation today, Google + basically asks us mentors whether we want to fail our students right now. + Which I don'T plan to do, knowing about the complexity of the Hurd + system, and the learning required before you can do useful code changes. + tschwinge: that really means a lot to me, and it got a + weight of my chest. + uh ok, i have to be the rude guy again + NlightNFotis: the gsoc is also a way for the student to prepare + for working in software development communities + whether free software/open source and/or in companies + people involved care a lot less about pathos than actual results + (or to prepare students to be hired by google, but that's + another story) + NlightNFotis: in other words, stop apologizing that much, stop + focusing so much on that, and just work as you can + + +# IRC, freenode, #hurd, 2013-07-31 + + teythoon: both samuel and thomas would be missing for the + week right? + nlightnfotis: they do, why? + nlightnfotis: err, they do?? why? + + +# IRC, freenode, #hurd, 2013-08-01 + + braunr: I checked out what you (and Thomas) suggested and + did some research on go on the Hurd. I have found out that go works, + until you need to use anything that has to do with a goroutine. I am now + playing with the go runtime and checking to see if turning thread + destruction to noop will have any difference. + + +# IRC, freenode, #hurd, 2013-08-05 + + youpi: whenever you have time, I would like to report my + progress as well. + nlightnfotis: sure, go ahead + but again, you should report before the meeting + so we can read it before coming to the discussion + I have written my report + ah + nlightnfotis: I have read your report, these days you have make a + great progress. + where is it? + it was available since yesterday + + http://www.fotiskoutoulakis.com/blog/2013/08/05/gsoc-partial-week-7-report/ + thanks hacklu. The particular piece of code I was studying + was very very interesting :) + nlightnfotis: I think you should show your link in here or email + next time. I have spend a bit more time to find that :) + youpi: for a tldr, at the last time I was told to check + gccgo's runtime for clues regarding the go routine failures. + hacklu: will keep that in mind, thanks. + youpi: thing is, gccgo operates on two different thread + types: G's (the goroutines, lightweight threads that are managed by the + runtime) and M's (the "real" kernel threads") + none of which are really "destroyed" + ok, makes sense + G's are put in a pool of available goroutines when their + status is changed to "Gdead" so that they can be reused + M's also don't seem to go away. There is always at least one + M (the bootstrap one) and all other M's that get created are also stashed + in a pool of available working threads. + you could put some debugging printfs in libpthread, to make sure + whether threads do die or not + I am studying this further as we speak, but they both don't + seem to get "destroyed", so that we can be sure that bugs are triggered + by thread destruction + I was beginning to believe that maybe I was looking in the + wrong direction + but then I looked at my past findings, and I noticed + something else + if you take a look at the first failed go routine, it failed + at the time.sleep function, which puts a goroutine to sleep for ns + nanoseconds. That made me think if it was something that had to do with + the context functions and not the goroutines' creation. + nlightnfotis: that's possible + nlightnfotis: I'd say you can focus on this very simple example: a + mere sleep + that's one of the simplest things a thread scheduler has to do, but + it has to do it right + fixing that should fix a lot of other issues + if I have understood correctly, there is at least one G + (Goroutine) and at least one M (kernel thread) running. Sleep does put + that goroutine at a hold, and restarting it might be an issue + talking about thread scheduling ? :) + nlightnfotis: go's runtime doesn't actually destroy kernel threads, + apparently + youpi: yeah, that's what I have understood so far. And it + neither does destroy goroutines. If there was an issue with thread + creation, then I guess it should be triggered in the beginning of the + program too (seeing as both M's and G's are created there) + the fact that it is triggered when a goroutine goes to sleep + makes me suspect the context functions + yes + again I am studying it the last days, in search of + clues. Will keep you all updated. + braunr: I have written my report and it is available here + http://www.fotiskoutoulakis.com/blog/2013/08/05/gsoc-partial-week-7-report/ + If you could read it and tell me if you notice something weird tell me + so. + nlightnfotis: ok + nlightnfotis: quite busy here so don't worry if i suddenly + disappear + nlightnfotis: hum, does go implement its own threads ?? + braunr: yeah. It has 2 threads. Runtime managed (the + goroutines) and "real" (kernel managed) ones. + i mean, does it still use libpthread ? + thing is none of them "disappear" so as to explain the bug + with "thread creation **and** destruction) + it must use libpthread for kernel threads as far as creation + goes. + ok, good + then, it schedules its own threads inside one pthread, right ? + using the pthread as a virtual cpu + yes. It matches kernel threads and runtime threads and runs + the kernel threads in reality + the scheduler decides which goroutine will run on each + kernel thread. + ew + this is pretty much non portable + and you're right to suspect context switching functions + yeah my thought for it was the following: thread creation, + if it was buggy, should be triggered as soon as a program starts, seeing + as at least one kernel thread and at least one go routine starts. My + sleep experiment crashes when the goroutine is put on hold + did you find the code putting on hold ? + I will give you the exact link, wait a moment + braunr: + https://github.com/NlightNFotis/gcc/blob/master/libgo/runtime/time.goc?source=c#L59 + that is the exact location is line 26, which calls the one I + pointed you at + ahah, tsleep + old ghost from the past + nlightnfotis: the real location is probably runtime_park + I will check this out. + + may I ask something non-technical but relevant to summer of + code? + sure + would it be okay if I took the day off tomorrow? + nlightnfotis: ask tschwinge but i guess it's ok + + have you found runtime_park ? + i'm downloading your repository from github but it's slow :/ + braunr: not yet. Grepping through the files didn't produce + any meaningful results and github's search is not working + braunr: there is that strange thing with th gccgo sources, + where I can find a function's declaration but not it's definition. Funny + thing is those functions are not really extern, so I am playing a hide + and seek game, in which I am not always successful. + runtime_park is declared in runtime.h. I have looked nearly + everywhere for it. There is only one last place I have not looked at. + braunr: I found runtime_park. It's here: + https://github.com/NlightNFotis/gcc/blob/master/libgo/runtime/proc.c?source=c#L1372 + + nlightnfotis: Taking the day off is fine. Have fun! + tschwinge: I am still here; Thanks for that tschwinge. I + will be for the next half hour or something if you would like to ask me + anything + nlightnfotis: I have no immediate questions (first have to read + your report and discussion in here) -- so feel free to log out and enjoy + the sun outside. :-) + + nlightnfotis, tschwinge: btw, have you seen + http://morsmachine.dk/go-scheduler ? + teythoon: thanks for the link. It's really interesting. + + +# IRC, freenode, #hurd, 2013-08-12 + + teythoon did you manage to build the Hurd successfuly? + ah yes, the Hurd is relatively easy + the libc is hard + debian glibc or hurd upstream libc? + but my build on darnassus was successful + *debian eglibc + well, I rebuilt the debian package with two tweaks + do you build on linux and rsync on hurd or ...? + I built it on Hurd, though I thought about setting up a cross + compiler + I see. The process was build Mach, build Hurd, and then + build glibc and it's ready or it needed more? + no, I never built Mach + I must admit I'm not sure about the "proper" procedure + if I change one of Hurds RPC definitions, I think the proper way + is to rebuild the libc against the new definitions and then the Hurd + but I found no way to do that, so everyone seems to build the + Hurd, install it, build the libc and then rebuild the Hurd again + I see. Thanks for that :) + + tschwinge, I have also written my report! It's available + here + http://www.fotiskoutoulakis.com/blog/2013/08/12/gsoc-week-8-partial-report/ + I can sum it up if you want me to. + nlightnfotis: I already read it! :-D + Oh, I didn't. I read the week 7 one. Let me read week 8. ;-) + ok. I am currently going through the assembly generated for + the sample program I have embedded my report. + the weird thing is that the assembly generated is pretty + much the same for the program with 1 and 2 goroutine functions (with the + obvious difference that the one with 2 goroutine functions has 1 more + goroutine in it's assembly code) + I can not understand why it is that when I have 1 goroutine, + an exception is triggered, but when I am having two (which are 99% + identical) it seems to be executed. + and I do not understand why the exception is triggered when + I manually use a goroutine. + To my understanding so far, there is at least 1 (kernel) + thread created at program startup to run main. The same thread gets + created to run a new goroutine (goroutines get associated with kernel + threads) + and it's obvious from the assembly generated. + go_init_main (the main function for go programs) starts with + a .cfi_startproc + the same piece of code (.cfi_startproc) starts a new kernel + thread (on which a goroutine runs) + nlightnfotis: Re your two-goroutines example: in that case I + assume, you're directly returning from the main function and the program + terminates normally. ;-) + nlightnfotis: Studying the assembly code for this will be too + verbose, too low-level. What we need is a trace of steps that happen + until the error. + tschwinge, that must be it, but it should trigger the bug, + since it still has at least one goroutine (and one is known to trigger + the bug) + nlightnfotis: I guess the program exits before the first + gorouting would be scheduled for execution. + the assembly for the goroutines is identical. You can't tell + one from the other. The only change is that it has 2 of these sections + instead of one + actually it's the same for the first one + nlightnfotis: I very much assume that the issue is not due to + the code generated by the Go compiler (which you're seeing in the + assembly code), but rather due to the runtime code in the libgo library. + I didn't think of it this way. + ... that improperly interacts with our libpthread. + so my research should focus on the runtime from now on? + Improperly may well imply that our libpthread is at fault, of + course, as we discussed. + Back to the one-gouroutine case (that shows the assertion + failure). Simple case: one goroutine, plus the "main" thread. + We need to get an understanding of the steps that happen until + the error happens. + As this is a parallel problem, and it is involving "advanced" + things (such as setcontext), I would not trust GDB too much when used on + this code. + I will have to manually step through the source myself, + right? + What I would do, is add printf's (or similar) into the code at + critical points, to get an udnerstanding of what's going on. + Such critical points are: pthread_create, setcontext, + swapcontext. + It sounds like a good idea. Anything else to note? + That way, you can isolate the steps required to trigger the + assertion failure. + For example, it could be something like: makecontext, + swapcontext, pthread_creat, boom. + pthread_create_internal is failing at an assertion. I wonder + what would happen if I remove that assertion. + Not without understanding what the error is, and why it is + happening (which steps lead to it). We don't usually do »voodoo + computing and programming by coincidence«. + tschwinge, I also figured out something. If it is a + libpthread issue, it should also get triggered when a simple C program + creates a thread (assuming _pthread_create is causing the issue) + so maybe I should write a C program to test that + functionality and see if it provides any further clues? + nlightnfotis: That's precile what the goal of »isolate the + steps required to trigger the assertion failure« is about: reduce the big + libgo code to a few function calls required to reproduce the problem. + nlightnfotis: I simple C program just doing pthread_create + evidently does not fail. + nlightnfotis: I assume you have a Go program dynamically linked + to the libgo you build? + yes. To the latest go build from the source (4.9) + *gccgo build from source + removing an assertion is usually extremely bad practice + Then you can just do something like make target-libgo (IIRC) + (or instead: cd i686-pc-gnu/libgo/ && make) to rebuild your changed + libgo, and then re-run the Go program. + the thought of randomly removing assertions shouldn't even reach + your mind ! + braunr: even if it is not permanent, but an experiment? + yes + can you explain to me why? + nlightnfotis: Not without understanding what the + error is, and why it is happening (which steps lead to it). We don't + usually do »voodoo computing and programming by coincidence«. + an assertion exists to make sure something that should *never* + happen never happens + removing it allows such events to silently occur + braunr: that's the theory, yes, to check invariants + i dont' know what you mean by using assertions for "an experiment" + unfortunately some people use assert for error handling :/ + that's wrong + and i dont't remember it to be the case in libpthread + nlightnfotis: can you point the faulting assertion again there + please ? + braunr: sure: Assertion `({ mach_port_t ktid = + __mach_thread_self (); int ok = thread->kernel_thread == ktid; + __mach_port_deallocate ((__mach_task_self + 0), ktid); ok; + })' failed. + so basically, thread->kernel_thread != __mach_thread_self() + this code is run only for num_threads == 1 + but has there been any thread destruction before ? + no. To my understanding kernel threads in the go runtime + never get destroyed (comments seem to support that) + IOW: is it certain the only thread left *is* the main thread ? + hm + intuitively, i'd say this is wrong + i'd say go doesn't destroy threads in most cases, but something in + the go runtime must have done it already + i'm not even sure the main thread still exists + check that + where is the go code you're working on ? + there are 3 files of interest + i'd like the whole sources please + I will find it in a moment + braunr: GCC Git clone, tschwinge/t/hurd/go branch. + it is /libgo/runtime/runtime.h + it is /libgo/runtime/proc.c + tschwinge: thanks + braunr: git://gcc.gnu.org/git/gcc.git + I will provide links on github + nlightnfotis: i sayd the whole sources, why do you insist on + giving me separate files ? + for checking it out quickly + oh I misunderstood that sorry + thought you wanted to check out thread creation and + destruction and that you were interested only in those specific files + tschwinge: is it completely contained there or are there external + libraries ? + braunr: You mean libgo? + tschwinge: possibly + tschwinge, I just made sure that yeah programs are + dynamically linked against the compiler's libgo + libgo.so.3 + does libgo come from gcc sources ? + yeah + ok + go files on gcc sources are split under two directories: go, + which contains the frontend go, and libgo which contains the libraries + and the runtime code + braunr: darnassus:~tschwinge/tmp/gcc/go.build/ is a recent + build, with sources in $PWD/../go/. + braunr: libgo is in i686-unknown-gnu0.3/libgo/.libs/ + so tschwinge to roundup for this week I should print debug + around the "hotspots" and see if I can extract more information about + where the specific problem is triggered right? + nlightnfotis: Yes, for a start. + nlightnfotis: identify the main thread, make sure it doesn't exit + noted. + braunr: do you have an idea about the issue I described + earlier? The one with the 1 goroutine triggering the bug, but the 2 + exiting successfully but with no output? + nlightnfotis: i didn't read + do you have 2 mins to read my report? I describe the issue + something messed up in the context i suppose + nlightnfotis: Uhm, I already explained that issue? + you did ? + tschwinge, I know, don't worry. I am trying to get all the + insight I can get. + you mentioned that the scheduler might have an issue and + that the main thread returns before the goroutines execu + *execute + right? + It is the normal thing for a process to terminate normally when + the main function returns. I would expect Go to behave the same way. + "Now, if we change one of the say functions inside main to a + goroutine, this happens" + how do you change it ? + Or am I confused? + tschwinge: i don't remember exactly + braunr: from say("world") to go say("world") + tschwinge, yeah I get that. What I still have not understood + is what is it specifically about the 2 goroutines that doesn't trigger + the issu when 1 goroutine does. + You said that it might have something to do with the + scheduler; it does seem like a good explanation to me + nlightnfotis: My understanding still is that the goroutinges + don't get executed before the main thread exits. + which scheduler ? + braunr: the runtime (go) scheduler. + tschwinge, Yeah, they don't. But still, with 1 goroutine: + you get into main, attempt to execute it, and bam! With two, it should be + the same, but strangely it seems to exit main without an issue + (attempt to execute the goroutine) + why should it be the same ? + braunr: seeing as one goroutine has problems, I can't see + why two wouldn't. At least one of the two should result in an exception. + nlightnfotis: why ? + nlightnfotis: they do have the problem + they don't run + they just don't run into that assertion, probably because there is + more than one thread + wait a minute. You imply that they fail silently? But still + end up in the same situation + yes + in which case it does look like a go scheduler problem + if I understood it correctly, that assertion fails when it + is only 1 thread? + yes + and since the main thread is always correct, i expect the main + thread has exited + which this happens because the one thread left is *not* the main + thread + (which is a libpthread bug) + but it's a bug we've not seen because we don't have applications + creating threads while exiting + I think I got it now. + try to put something like getchar() in your go program + something that introduces a break + so that the main thread doesn't exit + oh right. Thanks for that. And sorry tschwinge I reread what + you said, it seems I had misinterpreted what you suggested. + braunr: If you're interested: for a Go program triggering the + asserition, I don't see any thread exiting (see + darnassus:~tschwinge/tmp/gcc/a.go, run: cd ~tschwinge/tmp/gcc/go.build/ + && ./a.out) -- but perhaps I've been looking for the wrong things in l_. + File l is without a goroutine. Have to leave now, sorry. + braunr: If you want to rebuild: gcc/gccgo -B gcc -B + i686-unknown-gnu0.3/libgo ../a.go -Li686-unknown-gnu0.3/libgo/.libs + -Wl,-rpath,i686-unknown-gnu0.3/libgo/.libs + tschwinge: no i won't touch anything + but thanks + + +# IRC, freenode, #hurd, 2013-08-19 + + nlightnfotis: how are you going with gcc go? + I was print debugging all the week. + I can tell you I haven't noticed anything weird so far. + But I feel I am close to the solution + I have not written my report yet. + I will write it maximum until wednesday + I hope I will have figured it all out until then + a report is not for writing solutions, but for the progress + yes + it's completely fine to be saying "I've been debugging, not found + anything yet" + results or not, always write your reports on time, so your + mentor(s) know what you are doing + I see. Would you like me to write it right now, or is it + okay to write it a day or two later? + nlightnfotis: FYI. this week my report is not finished. just + state some problem I face now. + nlightnfotis: I'd say better write it now + youpi: Ok I will write it and tell you when I am done with + it. + youpi: here is my partial report describing what my course + of action looked like this + week. http://www.fotiskoutoulakis.com/blog/2013/08/19/gsoc-week-9-partial-report/ + of course, I will write in a day or two (hopefully having + figured out the whole situation) an exhaustive report describing + everything I did in detail + youpi: I have written my (partial) report describing how I + went about this week + http://www.fotiskoutoulakis.com/blog/2013/08/19/gsoc-week-9-partial-report/ + nlightnfotis: good, thanks! + youpi: please note that this is not an exhaustive link of my + findings or course of action, it merely acts as an example to demonstrate + the way I think and how I go about every day. + I will write an exhaustive report of everything I did so + far, when I figure out what the issue is, and I feel I am close. + well, you don't need to explain all bits in details + this is fine to show an example of how you went + but please also provide a summary of your other findings + oh okay, I will keep this in mind. :) + + +# IRC, freenode, #hurd, 2013-08-22 + + < nlightnfotis> if I want to rebuild libpthread, I have to embed it into + eglibc's source, then build? + < pinotree> or pick the debian sources, patch libpthread there and rebuild + < nlightnfotis> that's most likely what I am going to do. Thanks pinotree. + < pinotree> yw + < braunr> nlightnfotis: i usually add my patches on top of the debian glibc + ones, yes + < braunr> it requires some tweaking + < braunr> but it's probably the easiest way + < nlightnfotis> braunr: I was studying my issues with gcc, and everyday I + was getting more and more confident it must be a libpthread issue + < nlightnfotis> and I figured out, that I might wanna play with libpthread + this time + < braunr> it probably is but + < braunr> i'm not so sure you should dive there + < nlightnfotis> why not? + < braunr> because it can be worked around in go + < braunr> i had a test for you last time + < braunr> do you remember what it was ? + < nlightnfotis> nope :/ care to remind it? + < braunr> iirc, it was running the go test you did but with an additional + instruction in the main function, that pauses + < braunr> something like getchar() in c + < braunr> to make sure main doesn't exit while the goroutines are still + running + < braunr> i'm almost positive that the bug you're seeing is main returning + and libpthread beleiving it's acting on the main thread because there is + only one left + < nlightnfotis> oh that's easy, I can do it now. But it's probably what + thomas had suggested: go routines may not be running at all. + < braunr> they probably aren't + < braunr> and that's a context bug + < braunr> not a libpthread bug + < braunr> and that's what you should focus on + < braunr> the libpthread bug is minor + < nlightnfotis> which is strange, because I had studied the assembly code + and it the code for the goroutine was there + < nlightnfotis> anyway I will proceed with what you suggested + < braunr> yes please + < braunr> that's becoming important + < nlightnfotis> would you mind me dumping some of my findings for you to + evaluate/ post on opinion on? + < braunr> no + < braunr> please do so + < nlightnfotis> I have found that the go runtime starts with a total number + of threads == 1 + < braunr> nlightnfotis: as all processes + < nlightnfotis> I would guess that's because of using fork () + < nlightnfotis> oh so it's ok + < braunr> there always is a main thread + < braunr> even for non-threaded applications + < nlightnfotis> yeah, that I know. The runtime proceeds to create + immediately one more. + < braunr> then it's 2 + < nlightnfotis> and that's ok, it doesn't have an issue with that + < nlightnfotis> yep + < nlightnfotis> the issue begins when it tries to create the 3rd one + < braunr> hum + < braunr> from what i remember + < nlightnfotis> it happily goes through the go runtime's kernel thread + allocation function (runtime_newm()) + < braunr> you also had an issue with the first goroutine + < nlightnfotis> that's with 1 go routine + < braunr> ok + < braunr> so 1 goroutine == 3 threads + < nlightnfotis> it seems so yes. + < braunr> depending on how the go scheduler is able to assign goroutines to + kernel threads i suppose + < nlightnfotis> mind you, (disclaimer: I am not so sure about that) that go + must be using one extra thread for the runtime scheduler and garbage + collector + < braunr> that's ok + < nlightnfotis> so that's where the two come from + < braunr> and expected from a modern runtime + < nlightnfotis> the third must be the go routime + < nlightnfotis> routine + < braunr> hum have to go + < braunr> brb in a few minutes + < braunr> keep posting + < nlightnfotis> it's ok take your time + < nlightnfotis> I will be here + < braunr> but i may not ;p + < braunr> in fact i will not + < braunr> i have like 15 mins ;) + < braunr> nlightnfotis: ^ + < nlightnfotis> I am trying what you told me to do with go + < nlightnfotis> it's ok if you have to go, I will continue investigating + and be back tomorrow + < braunr> ok + < nlightnfotis> braunr: I tried what you asked me to do, both we waiting to + read a string from stdin and with waiting to read an int from stdin + < nlightnfotis> it never waits, it still aborts with the assertion failure + < nlightnfotis> both with one and two go routines + < nlightnfotis> dumping it here just for the log, running the same code + without waiting for input results in two threads created (1 for main and + 1 for runtime, most likely) and "normal" execution. + < nlightnfotis> normal as in no assertion failure, + < nlightnfotis> it seems to skip the goroutines altogether + + +# IRC, freenode, #hurd, 2013-08-23 + + < braunr> nlightnfotis: can i see your last go test code please ? the one + with the read at the end of main + < nlightnfotis> braunr sure + < nlightnfotis> sorry I had gone to the toilet, now I am back + < nlightnfotis> I will send it right now + < nlightnfotis> braunr: http://pastebin.com/DVg3FipE + < nlightnfotis> it crashes when it attempts to create the 3rd thread (the + 1st goroutine), with the assertion fail + < nlightnfotis> if you remove the Scanf it will not fail, return 0, but + only create 2 threads (skip the goroutines alltogether) + < braunr> can you add a print right before main exits please ? + < braunr> so we know when it does + < nlightnfotis> doing it now + < nlightnfotis> braunr: If I enter a print statement right before main + exits, the assertion failure is triggered. If I remove it, it still runs + and creates only 2 threads. + < braunr> i don't understand + < braunr> 14:42 < nlightnfotis> it crashes when it attempts to create the + 3rd thread (the 1st goroutine), with the assertion fail + < braunr> why don't you get that ? + < nlightnfotis> This seems like having to do with the runtime. I mean, I + have seen the emitted assembly from the compiler, and the goroutines are + there. Something in the runtime must be skipping them + < braunr> context switching seems buggy + < nlightnfotis> if it's only goroutines in main + < nlightnfotis> if there's also something else in main, the assertion + failure is triggered. + < braunr> i want you to add a printf right before main exits, from the code + you pasted + < nlightnfotis> I did. It acts the same as before. + < braunr> do you see that last printf ? + < nlightnfotis> no. It aborts before that + < nlightnfotis> :q + < braunr> find a way to make sure the output buffer is flushed + < braunr> i don't know how it's done in go + < nlightnfotis> mistype the :q, was supposed to do it vim + < nlightnfotis> braunr will do right away + < nlightnfotis> there is one thing I still can not understand: Why is it + that two threads are ok, but when the next is going to get created, the + assertion is triggered. + < braunr> nlightnfotis: the assertion is triggered because a thread is + being created while there is only one thread left, and this thread isn't + the main thread + < braunr> so basically, the main thread has exited, and another (the last + one) is trying to create one + < nlightnfotis> the other one might be the runtime I guess. Let me check + out quickly what you suggested + < braunr> the main thread shouldn't exit at all + < braunr> so something with context switching is wrong + < nlightnfotis> the thing is: it doesn't seem to exit when this happens. My + debug statements (in the runtime) suggest that there are at least 2 + threads active, kernel threads don't get destroyed in gccgo + < braunr> 14:52 < braunr> so something with context switching is wrong + < braunr> how well have the context switching functions been tested ? + < nlightnfotis> to be honest I have not tested them; up until this point I + trusted they worked. Should I also take a look at them? + < braunr> how can you trust them ? + < braunr> they've never been used .. + < braunr> thomas added them recently if i'm right + < braunr> nothing has been using them except go + < braunr> piece of advice: don't trust anything + < nlightnfotis> I think they were in before, and thomas recently patched + them! + < braunr> they were in, but didn't work + < braunr> (if i'm right) + < braunr> nlightnfotis: you could patch libpthread to monitor the number of + threads + < braunr> or the go runtime, idk + < nlightnfotis> I have done so on the go runtime + < nlightnfotis> that's where I am getting the number of threads I + report. That's straight out from the scheduler's count. + < braunr> threads can exit by calling pthread_exit() or returning from the + thread routine + < braunr> make sure you catch both + < braunr> also check for pthread_cancel(), although i don't expect any in + go + < nlightnfotis> braunr: Should I really do that? I mean, from what I can + see in gccgo's comments, Kernel threads (m) never go away. They are added + to a pool of m's waiting for work if there is no goroutine running on + them + < nlightnfotis> I mean, I am not so sure they exit at all + < braunr> be sure + < braunr> point me the code please + < nlightnfotis> + https://github.com/NlightNFotis/gcc/blob/master/libgo/runtime/proc.c#L224 + < nlightnfotis> this is where it get's stated that m's never go away + < nlightnfotis> and at line 257 you can see the pool + < nlightnfotis> and wait for me to find the code that actually releases an + and places into the pool + < nlightnfotis> yep found it + < nlightnfotis> line 817 mput + < nlightnfotis> puts a kernel thread given as parameter to the pool + < nlightnfotis> another proof of the theory is at line 1177. It states: + "This point is never reached, because scheduler does not release os + threads at the moment." + < braunr> fetching git repository, bit busy, i'll have a look in 5-10 mins + < nlightnfotis> oh it's ok, I had pointed you to the file directly on + github to check it out instantly, but never mind, the file is + /libgo/runtime/proc.c + < braunr> damn github is so slow .. + < braunr> nlightnfotis: i much prefer my own text interface :) + < nlightnfotis> braunr: just out of curiosity what's your setup? I use vim + mainly (not that I am a vim expert or anything, I only know the basics, + but I love it) + < braunr> same + < braunr> nlightnfotis: add a trace at that comment to make SURE threads do + not exit + < braunr> you *cannot* get the libpthread assertion with more than 1 thread + < braunr> grep for pthread_exit() too + < nlightnfotis> will do it now. It will take about an hour to compile + though. + < braunr> i don't understand the stack trick at the start of runtime_mstart + < braunr> ah splitstack .. + < nlightnfotis> I think I should try cross compiling gcc, and then move + files on the hurd. It would be so much faster I believe. + < braunr> than what ? + < nlightnfotis> building gcc on the hurd + < nlightnfotis> I remember it taking about 10minutes with make -j4 on the + host + < nlightnfotis> it takes 45-50 minutes on the vm (kvm enabled) + < braunr> but you can merely rebuild the files you've changed + < nlightnfotis> I feel stupid now... + < braunr> nlightnfotis: have you tried setting GOMAXPROCS to 1 ? + < nlightnfotis> not really, but from what I know GOMAXPROCS defaults to 1 + if not set + < braunr> again, check that + < braunr> take the habit of checking things + < nlightnfotis> braunr: yeah sorry for that. I have checked these things + out before they don't come out of my head I just don't remember exactly + where I had seen this + < braunr> what you can also do is use gdb to catch the assertion and check + the number of threads at that time, as well as the number of threads as + seen by libpthread + < nlightnfotis> braunr: line 492 file proc.c: runtime_gomaxprocs = 1; + < braunr> also see runtime.LockOSThread + < braunr> to make sure the main thread is locked to its own pthread + < nlightnfotis> I can see in line 529 of the same file that the first + thread is getting locked + < nlightnfotis> the new threads that get initialised are non main threads + < braunr> if(!runtime_sched.lockmain) runtime_UnlockOSThread(); + < braunr> i'm suggesting you set runtime_sched.lockmain + < braunr> so it remains true for the whole execution + < braunr> this code looks like a revamp of plan9 lol + < nlightnfotis> it is + < nlightnfotis> in the paper from Ian Lance Taylor describing gccgo he + states somewhere that the original go compilers (the 3gs) are a modified + version of plan9's C compiler, and that gccgo tries to follow them + < nlightnfotis> they differ in a lot of ways though + < nlightnfotis> the 3gs generate a lot of code during link time + < nlightnfotis> gccgo follows the standard gcc procedures + < braunr> eh :D + < nlightnfotis> go -> gogo -> generic -> gimple -> rtl -> object + < nlightnfotis> that's how it flows as far as I recall + < nlightnfotis> gogo is an internal representation of go's structure inside + the gccgo frontend + < nlightnfotis> that's why you see many functions with gogo in their name + < nlightnfotis> I just revisited the paper: gogo is there to make it easy + to implement whatever analysis might seem desirable. It mirrors however + the Go source code read from the input files + < braunr> nlightnfotis: what are you trying now ? + < nlightnfotis> I am basically studying the runtime's source code while + waiting for gccgo to compile on the Hurd + < nlightnfotis> yes I did the stupid whole recompilation again. :/ + < braunr> nlightnfotis: compile for what ? + < braunr> what test ? + < nlightnfotis> to check out to see if M's really are added to the pool + instead of getting deleted + < braunr> nlightnfotis: but how ? + < nlightnfotis> braunr: I have added a statement in mput if we get there + first, and secondly the number of threads that the runtime scheduler + knows that are waiting (are in the pool of m's waiting for work) + < braunr> ok + < braunr> when you can, i'd really like you to do this test : + < braunr> 15:55 < braunr> what you can also do is use gdb to catch the + assertion and check the number of threads at that time, as well as the + number of threads as seen by libpthread + < nlightnfotis> the number of threads required by libpthread is gonna need + me to recompile the whole eglibc right? + < braunr> no + < braunr> just print it with gdb + < nlightnfotis> oh, ok + < braunr> it's __pthread_num_threads + < nlightnfotis> is gdb reliable? I remember thomas telling me that I can't + trust gdb at this point in time + < braunr> and also __pthread_total + < braunr> really ? + < braunr> i don't see why not :/ + < braunr> youpi: any idea about what nlightnfotis is speaking of ? + < nlightnfotis> I may have misunderstood it; don't take it by heart + < nlightnfotis> I don't wanna put words in other people's mouths because I + misunderstood something + < braunr> sure + < braunr> that's my habit to check things + < youpi> braunr: nope + < braunr> youpi: and am i right when i say we don't use context functions + on the hurd, and they're likely to be incomplete, even with the recent + changes from thomas ? + < braunr> (mcontext, ucontext) + < nlightnfotis> braunr: this is what had been said: 08:46:30< tschwinge> As + this is a parallel problem, and it is involving "advanced" things (such + as setcontext), I would not trust GDB too much when used on this code. + < pinotree> if thomas' changes were complete and polished, i guess he would + have sent them upstream already + < braunr> i see but + < braunr> you can normally trust gdb for global variables + < nlightnfotis> Didn't post it as an objection; I posted it because I felt + bad putting the wrong words on other people's mouths, as I said + before. So I posted his original comment which was more authoritative + than my interpretation of it + < braunr> i wonder if there is a tunable to strictly map one thread to one + goroutine + < braunr> nlightnfotis: more focus on the work, less on the rest please + < nlightnfotis> Did I do something wrong? + < braunr> you waste too much time apologizing + < braunr> for no reason + < braunr> nlightnfotis: i suppose you don't use splitstack, right ? + < nlightnfotis> no I didn't + < nlightnfotis> and here's something interesting: The code I just added, in + mput, to see if threads are added in the pool. It's not there, no matter + what I run + < nlightnfotis> So it seems that we the runtime is not reaching mput. + < nlightnfotis> Could this be normal behavior? I mean, on process + termination just release the resources so mput is skipped? + < braunr> i don't know the code well enough to answer that + < braunr> check closer to the lower interface + + +# IRC, freenode, #hurd, 2013-08-25 + + < nlightnfotis> braunr: what is initcontext supposed to be doing? + < braunr> nlightnfotis: didn't look + < braunr> i'll take a look later + < nlightnfotis> braunr: I am buffled by it. It seems to be doing nothing on + the Hurd branch and nothing in the Linux branch either. Why call a + function that does nothing? (it doesn't only seem to do nothing, I have + confirmed it) + < nlightnfotis> youpi: I was wondering if you could explain me + something. What is the initcontext function supposed to be doing? + < youpi> you mean initcontext ? + < nlightnfotis> yes + < youpi> ergl + < youpi> you mean makecontext? + < nlightnfotis> no initcontext. I am faced with this in the goruntime. It's + called in it, but it is doing nothing. Neither in the Hurd tree, nor in + the Linux one + < youpi> I don't know what initcontext is + < youpi> where do you read it? + < nlightnfotis> youpi: let me show you + < nlightnfotis> + https://github.com/NlightNFotis/gcc/blob/fotisk/goruntime_hurd/libgo/runtime/proc.c#L80 + < nlightnfotis> and it is called in quite a few places + < youpi> it's not doing nothing, see other implementations + < pinotree> if SETCONTEXT_CLOBBERS_TLS is not defined, initcontext and + fixcontext do nothing + < pinotree> otherwise (presuming if setcontext clobbers tls) there are two + implementations for solaris/x86_64 and netbsd + < youpi> I don't think we have the tls clobber bug + < youpi> so these functions being empty is completely fine + < nlightnfotis> pinotree: oh, you mean it's used as a workaround for these + two systems only? + < youpi> yes + < pinotree> yes + < nlightnfotis> That makes sense. Thanks both of you for the help :) + < nlightnfotis> youpi: if this counts as some progress, I have traced the + exact bootstrapping sequence of a new go process. I know a good deal of + what is done from it's spawn to it's end. There are some things I wanna + sort out, and later tonight I will write my report for it to be ready for + tomorrow. + < youpi> good + + +# IRC, freenode, #hurd, 2013-08-26 + + < nlightnfotis> Hi everyone, my report is here + http://www.fotiskoutoulakis.com/blog/2013/08/26/gsoc-week-10-report/ + < youpi> nlightnfotis: you should clearly put printfs inside libpthread + < youpi> to check what is happening with the ktids + < nlightnfotis> youpi: yep, that's my next course of action. I just want to + spend some more time in the go runtime to make sure that I understand the + flow perfectly, and to make sure that it is not the runtime's fault + < braunr> nlightnfotis: did you try gdb to print the number of threads ? + < youpi> nlightnfotis: to build it, the easiest way is to start building + eglibc, and when you see it compiling C files (i.e. run i486-gnu-gcc-4.7 + etc.) + < youpi> stop it + < youpi> and go into build/hurd-i386-libc, and run "make others" from there + < nlightnfotis> braunr: that was my plan for today or tomorrow :) + < braunr> start building *debian* glibc + < youpi> there's perhaps some way to only build libpthread, but I don't + remember + < braunr> nlightnfotis: ok + < braunr> youpi: i suggested he tried gdb first + < youpi> why not + < braunr> if you need quick glibc builds, you can use darnassus + < nlightnfotis> braunr: how much time on average should I expect it to + take? + < youpi> it highly depends on the machine + < youpi> it can be hours + < youpi> or a few minutes + < youpi> depending you already have a built tree, a fast disk, etc. + < braunr> make lib others on darnassus takes around 30 minutes + < braunr> a complete dpkg-buildpackage from fresh sources takes 5-6 hours + < braunr> make others from a built tree is very quick + < braunr> a few minutes at most + < braunr> nlightnfotis: i don't see any trace of thread exiting in your + report, is that normal ? + < nlightnfotis> yeah, I guess, since they don't exit prematurely, they are + released along with other resources at the process' exit + < braunr> i'll rephrase + < braunr> you said last time that you saw a function never got called + < braunr> i assumed it was because a thread exited prematurely + < nlightnfotis> oh I sorted it out with the help of youpi and pinotree + yesterday + < braunr> that's different + < braunr> i'm not talking about the function that does nothing + < braunr> i'm talking about the one never called + < nlightnfotis> oh, go on then, + < braunr> i don't remember its name + < braunr> anyway + < nlightnfotis> abort()? + < braunr> i hope abort doesn't get called :) + < nlightnfotis> it doesn't + < braunr> i thought it was the one right before + < braunr> what i mean is + < nlightnfotis> oh runtime_mstart, it does get called + < braunr> add traces at thread exit points + < nlightnfotis> I sorted it out too + < braunr> make *sure* threads don't exit + < nlightnfotis> it get's called to start the kernel thread created at + process spawn at the runtime_schedinit + < braunr> if they really don't, it's probably a context/tls issue + < nlightnfotis> I will do this right now. + < nlightnfotis> braunr: if it's a context/tls issue it's libpthread's + problem? + + +# IRC, freenode, #hurd, 2013-09-02 + + Hello! My report for this week is online: + http://www.fotiskoutoulakis.com/blog/2013/09/02/gsoc-week-11-report/ + nlightnfotis: there always is a signal thread in every hurd + program + nlightnfotis: i also pointed out that there are two variables + involved in counting threads in libpthread, the other one being + __pthread_num_threads + again, more attention to work and details, less showmanship + i'm tired of repeating it + nlightnfotis: doesn't backtrace work in gdb to tell you what + 0x01da48ec is? + also, do you have libc0.3-dbg installed? + braunr: __pthread_num_threads reports is 4. + then why isn't it in your report ? + it's acceptable that you overlook it + and youpi: yeah I have got the backtrace, but 0x01da48ec is + ?? () from /lib/i386-gnu/libc.so.3 + it's NOT when someone else has previously mentioned it to you + nlightnfotis: only that line, no other line? + it has 8 more youpi, the one after ?? is mach_msg () + form/lib/gni386-gnu/libc.so.0.3 + yes mach_msg + almost everything ends up in mach_msg + you should probably pastebin somewhere the output of thread apply + all bt + what's before that ? + braunr: I don't know how I even missed it. I skimmed through + the code and only found __pthread_total and assumed that it was the total + number of threads + nlightnfotis: i don't know either + take notes + before mach_msg ins __pthread_timedblock () from + /lib/i386-gnu/libpthread.so.0.3 + I will add it to pastebin in a second + i find it very disappointing that after several weeks blocking on + this, despite all the pointers you've been given, you still haven't made + enough progress to reach the context switching functions + last week, most progress was made when we talked together + then nothing + it seems that you disappear, apparently searching on your own + but for far too long + braunr: I do search on my own, yes, + almost like exploiting being blocked not to make progress on + purpose ... + but too much + braunr: I am not doing this on purpose, I believe you are + unfair to me. I am trying to make as much progress as I can alone, and + reach out only when I can't do much more alone + then why is it only now that we get replies to questions such as + "how much is __pthread_num_threads" ? + why do you stop discussions for almost a week, just to find + yourself blocked again ? + I was working on gcc, going through the runtime making sure + about assumptions and going through various other goroutine or not + programs through gdb + that doesn't take a week + clearly not + last time we talked was + 10:40 < nlightnfotis> braunr: if it's a context/tls issue it's + libpthread's problem? + it did for me... honestly, what is it you believe I am doing + wrong? I too am frustrated by my lack of progress, but I am doing my best + august 26 + yeah, I wanted to make sure about certain assumptions on the + gcc side. I don't want to start hacking on libpthread only to see that it + might have been something I msissed on the gcc side + i told you + it's probably not a libpthread issue + the assertion is + but it's minor + it's not the realy problem, only a side effect + i told you about __pthread_num_threads, why didn't you look at it + ? + i told you about context switching functions, why nothing about it + ? + doing a few printfs to check numbers and using gdb to check them + at break points should be quick + when we talk,ed we had the results in a few minutes + yeah, because I was guided, and that helped me target my + research. On my own things are quite different. I find out something + about gcc's behavior, then find out I need tons more information, and I + have a lot of things that I need to research to confirm any assumptions + from my side + how did you miss the signal thread ? + we even talked about it right here with hacklu + i'll say it again + if blocked more than one day, ask for help + 2 days minimum each time is just too long + I'm sorry. I will be online every day from now on and report + every 10 minutes, on my course of actions. + I recognise that time is off the essence at this point in + time + it's also NO + NO + *SIGH* + nlightnfotis: calm down. braunr just want to help you solve + problem quickly. + 10 minutes is the other extreme + nlightnfotis: in my experiecence, if something block me, I will + keep asking him until I solve the problem. + it's also very frustrating to see you answer questions quickly + when you're here, then wait days for unanswered questions that could have + taken little time if you kept being here + this just gives the impression that you're doing something else in + parallel that keeps you busy + and comfort me in believing you're not being serious enough + aboutit + yeah, I understand that it gives that impression. The only + thing I can tell you now, is that I am *not* doing something else in + parallel. I am only trying to demonstrate some progress alone, and when + working alone things for me take quite some more time than when I am + guided + hacklu: i'm actually the nervous one here + braunr: ok, I understand I have dissapointed you. What would + you suggest me to do from now on? + braunr: :) + manage your time correctly or you'll fail + i'm not the main mentor of this project so it's not for me to + decide + but if i were, and if i had to wait again for several days before + any notice of progress or blocking, i wouldn't even wait for the end of + the gsoc + you're confronted with difficult issues + tls, context switching, thread + ing + they're all complicated + unless you're very experienced and/or gifted, don't assume you can + solve it on your own + and the biggest concern for me is that it's not even the main + focus of your project + you should be working on go + on porting + any side issues should be solved as quickly as possible + and we're now in september ... + go is working quite alright. It's goroutines that have + issues. + nlightnfotis: same thing + goroutines are part of go as far as i'm concerned + and they're working too, something in the hurd isn't + so it's a side issue + you're very much entitled to ask as much help as you need for side + issues + and i strongly feel you didn't + yeah, you're right. I failed on that aspect, mainly because + of the way I work. I wanted to show some progress on my own, and not be + here and spam all day. I felt that spamming questions all day would + demonstrate incompetence from my side + and I wanted to show that I am capable of solving my + problems on my own. + well, in a sense it does, but that's not the skills we were + expecting from you so it's perfectly ok + nlightnfotis: no development group, even in companies, in their + right mind, would expect you to grasp the low level dark details of an + operating system implementation in a few weeks ... + braunr: ok, may I ask what you suggest to me that my next + course of action is? + let me see + nlightnfotis: your report mentions runtime_malg + yes, I runtime malg always returns a new goroutine + nlightnfotis: what's the problem ? + a new m created is assigned a new goroutine via runtime_malg + what happens to that goroutine? Is it destroyed? Because it + seems to be a bogus goroutine. Why isn't the kernel thread instantly + picking the one goroutine available at the global goroutine pool? + let's see if it's that hard to figure out + seeing as m's and g's have a 1:1 (in gccgo) relationship, + and a new kernel thread is created everytime there is a new goroutine + there to run. + are you sure about that 1:1 relationship ? + i hardly doubt it + highly* + yeah, that's what I thought too, but then again, my research + so far shows that when a new goroutine is created, a new kernel thread + creation follows suit + what I have mentioned of course, happens in runtime_newm + nlightnfotis: that's when you create a new m, not a new g + yes, a new m is created when you create a new g. My issue is + that during m's creation, a new (bogus) g is created and assigned to the + m. I am looking into what happens to that. + nlightnfotis: "a new m is created when you create a new g", can + you point me to the code ? + braunr: matchmg line 1280 or close to that. Creates new m's + to run new g's up to (mcpumax) + "Kick off new m's as needed (up to mcpumax)." + so basically you have at most mcpumax m + yeah. but for a small number of goroutines (as for example + in my experiments), a new m is created in order to run a new g. + runtime_newm is called only if mget(gp)) == nil + be rigorous please + when i ask + 11:01 < braunr> are you sure about that 1:1 relationship ? + this conclusively proves it's *false* + so don't answer yes to that + it's true for a small number of goroutines, ok + and at startup + because then, mget returns an existing m + nlightnfotis: this g0 goroutine is described in the struct as + G runtime_g0; // idle goroutine for m0 + runtime_malg builds it with just a stack + apparently, that's the goroutine an m runs when there are no g + left + so yes, the idle one + it's not bogus + I thought m0 and g0 where the bootstrap m and g for the + scheduler. + *correction: runtime_m0 and runtime_g0 + hm i got a bit fast + G* g0; // goroutine with scheduling stack + braunr: scheduling stack with stacksize = -1? + unless it's not used as a parameter + let me investigate that + yeah now that I am seeing it, it might make sense, if it + using a default stack size, #defined as StackMin + g0 looks like a placeholder + i think it's used to reuse switching code when there is only one + goroutine involved + e.g. when starting + anyway i don't think we should waste too much time with it + nlightnfotis: try to make a real 1:1 mapping + that's something else i suggested last time + braunr: ok. Where do you suspect the problem lies? + context switching + inside the goruntime? + in glibc + try to use runtime.LockOSThread + http://code.google.com/p/go-wiki/wiki/LockOSThread + nlightnfotis: http://golang.org/pkg/runtime/ is probably better + what exactly do you mean by `use runtime.LockOSThread`? + LockOSThread locks the very first m and goroutine as the main threads + during process initialisation + in proc.c line 565 or something + i'm not sure it will help, because the problem is likely to occur + before even switching to the goroutine that locks its m, but worth trying + 11:28 < braunr> nlightnfotis: http://golang.org/pkg/runtime/ is + probably better + the first example is specific to GUIs that have requirements on + the main thread + whereas i want every goroutine to run in its own thread + I have also noticed that some context switching happens in + the goruntime even with a low number of goroutines and kernel threads + that's expected + goroutines must be viewed as works, and ms as worker threads + everytime a goroutine sleeps, its m should be switching to useful + work + nlightnfotis: i'd make prints (probably using mach_print) of + contexts when saved and restored + and try to see if it makes any sense + that's not simple to setup but not overly complicated either + don't hesitate to ask for help + from inside glibc, right? + yes + well + no from go + don't touch glibc from now + put these prints near calls to makecontext/swapcontext + and setcontext/getcontext + wel + you'll be using getcontext i think + noted it all. I also have the gdb output you asked me for + http://pastebin.com/LdnMQDh1 + i don't see main + some notes first: The main thread is the one with id 4, and + the output on the top is its backtrace. + and main.main is run in thread 6 + Remember that main when it comes to go is in the file + go-main.c + so main becomes runtime_MHeap_Scavenger + yeah, main.main is the code of the program, (the one the + user wrote, not the runtime) + yeah, it becomes a gc thread + seeing as runtime_starttheworld reports that there is + already one gc thread + and how much are __pthread_total and __pthread_num_threads for + that trace ? + they were: __pthread_total = 2, and __pthread_num_threads = + 4 + can you paste the assertion again please, just to make sure + a.out: ./pthread/pt-create.c:167: __pthread_create_internal: + Assertion `({ mach_port_t ktid = __mach_thread_self (); int ok = + thread->kernel_thread == ktid; + __mach_port_deallocate ((__mach_task_self + 0), ktid); ok; + })' failed. + btw, install the -dbg packages too + dbg for which one? gccgo? + libc0.3 + pthread/pt-create.c:167 is __pthread_sigstate (_pthread_self (), + 0, 0, &sigset, 0); here :/ + that assertion should be in __pthread_thread_start + let's just say gdb is confused + braunr: apt-get source eglibc ; cd eglibc-* ; debian/rules patch + pinotree: i have + and that assertion can only trigger if __pthread_total is 1 + so let's say it just got to 2 + it does from very early on in process initialisation + let me check this out again + hm + actually, both __pthread_total and __pthread_num_threads must be 1 + the context functions might be fine actually + braunr: __pthread_num_threads = 2 right from the start of + the program + 0x01da48ec is in mach_msg_trap + something happened with libpthreads recently .. + i can't even start iceweasel + braunr: what's the error? + iceweasel: ./pthread/../sysdeps/generic/pt-mutex-timedlock.c:70: + __pthread_mutex_timedlock_internal: Assertion `__pthread_threads' failed. + +But not the [[open_issues/libpthread_dlopen]] issue? + + considering __pthread_threads is a global variable, this is tough + i wonder if that's the issue with nlightnfotis's work + wrong symbol resolution, leading libpthread to consider there is + only one thread running + try with LD_PRELOAD=/lib/i386-gnu/libpthread.so.0 iceweasel + same + maybe the switch to glibc 2.17 + this assertion is triggered by __pthread_self, assert + (__pthread_threads); + __pthread_threads being the array of thread pointers + so either corrupted (but we hardly changed anything ...) or wrong + resolution + __pthread_num_threads includes the signal thread, __pthread_total + doesn't + braunr: I recompiled with the libc debugging symbols and I + have new information + the threads block at mach_msg_trap + again, almost everything blocks there + mach_msg is mach ipc, the way hurd system calls are implemented + and the next calls (if it didn't block, from what I can see + from eip) are mach_reply_port and mach_thread_self + please paste it + yes give me 2 mins plz, brb + pinotree: looks different for firefox + it seems it calls pthread_key_create before pthread_create + something our libpthread doesn't handle correctly + braunr: http://pastebin.com/yNbT7nLn + braunr: what do you mean? + pinotree: i mean libpthread needs to be fixed so thread-specific + data can be set even without a call to pthread_create + nlightnfotis: hum, we already knew it was blocking in a semaphore + nlightnfotis: ok forget the other things i told you to test + nlightnfotis: track __pthread_total and __pthread_num_threads + add prints (again, with mach_print) to see when (and why) they + change and go back to 1 + braunr: i see that pthread_key_create uses a mutex which in + turns needs _pthread_self(), but shouldn't at least one pthread_create be + done (directly by libc for the main thread)? + pinotree: no :) + well + it should have been for the signal thread indeed + and the signal thread exists + and the main thread? + not the main, no + how so? + a simple test program shows it does indeed work .. + so this is again another problem in firefox too + braunr: I don't think I understand this. I mean how can + pthread_total and __pthread_num_thread turn to 1, when , right before and + right after the crash they have numbers between 2, 3, and 4? + how did you get their values "right before" the crash ? + I have set a breakpoint to a printing function right before + the go statement + (right before in this context, in the application code, not + the runtime code, but then again, I don't really think they are too far + each other) + well, that's the mystery + I am not challenging what you said, I will of course do, + just asking to understand some things + they may either turn to 1, or there is some mess with symbol + resolution leading threads to see a value of 1 + *do it + there* + braunr: ping + just ask ;) + teythoon: have you used mach_print? + no + I have some questions about it + ask them + I was told to use them inside go's runtime, to print the + values of __pthread_total and __pthread_num_threads. The thing is, these + values (I believe) are unknown to the runtime, they are only known to the + executable (linking time and later) + so? if the requested information is bound to a symbol that is + resolved at link time, you can print it from within the runtime + the same way any function from the libc is not known to the + executable until linking against it, but you can still "use" it in your + executable + yeah, ok I understand that, but these are references that + are resolved at link time. The values I want to print are totally unknown + to the runtime (0 references to them) + if the value you are interested in is bound to the symbol + __pthread_total at link time, then you've got a reference you can use + doesn't printing __pthread_total work? did you try that? + no, whenever I printed these values I did it from gdb. I am + trying to do what you suggested atm + nlightnfotis: im here + printing those values from libgo will tell us what value libgo + actually sees + I am trying to use mach_print. Could you give me some + pointers on its usage (inside the goruntime?) (I have already read your + document here + http://www.gnu.org/software/hurd/microkernel/mach/gnumach/interface/syscall/mach_print.html + and the example code) + and symbol resolution may depend on where it's done from + nlightnfotis: first, it only work with -dbg kernels + so make sure you're running one + actually, i'll write you a patch + including a mach_printf function with argument parsing + isn't it on by default? I read that on the document you are + discussing mach_printf + ahh ok + it's on by default on -dbg kernels + i'll make a repository on darnassus too + better store it there + nlightnfotis: + http://darnassus.sceen.net/gitweb/rbraun/mach_print.git/ + nlightnfotis: i suggest you implement mach_print with inline asm + statement in a C file, so that you don't need to alter the build system + configuration + i'll make an example of that too + braunr: that wasn't a problem. My only real problem atm is + that __atomic_t isn't recognised as a type, and I can not find the header + file for it on Hurd + it was pt-internal.h in libpthread + ah + nlightnfotis: just in case, i updated the repository with an + inline assembly version + let's see about __atomic_t + sysdeps/i386/bits/pt-atomic.h:typedef __volatile int __atomic_t; + nlightnfotis: just redeclare it as this locally + nlightnfotis: ok ? + I am working on it, because I still haven't found what + __atomic_t is typedefed from. Thinking of typedefing an int to it and see + how it goes + braunr: found it just now: __volatile int + "just now" ? + 14:19 < braunr> sysdeps/i386/bits/pt-atomic.h:typedef __volatile + int __atomic_t; + I was using cscope all this time + why use cscope at all when i tell you where it is ? + because I didn't notice it: your discussion was between + pino's and srs' and I wasn't tagged and thought it had something to do + with their discussion + (sorry) + no it was my bad + ok + pinotree: there is indeed a special call to + __pthread_create_internal for the main thread + yeah + braunr: if there wouldn't be that libc→pthread bridge, things + like pthread_self() or so wouldn't work for the main thread + pinotree: right + braunr: weird thing is that the error you got is usually a sign + that pthread is not linked in explicitly + pinotree: yes + pinotree: with firefox, gdb can't locate pthread symbols before a + call to a pthread function + so yes, libpthread is loaded after main is called + nlightnfotis: can you give me a quick procedure to build gcc with + go support from your repository, and then test a go program please ? + to i can have a better look at it myself + so* + braunr: sure you want access to my go repo? If you already + have gcc repo add my github repo as a remote and checkout + fotisk/goruntime_hurd + i have your github repo + git checkout fotisk/goruntime_hurd (You may need to revert a + commit or two, because of my latest endeavour with mach_print + braunr: check it out now, I reverted some messy commits for + you to rebuild + nlightnfotis: i won't work on it right now, i'm building glibc to + check some things in libpthread + since it seems to be the source of your problems and many others + oh ok then. btw, it compiles ok, but when I try to compile + another program with gccgo collect2 cries about undefined references to + __pthread_num_threads and __pthread_total + Oo + another program ? + braunr: will I get the same result if I slowly go through it + with gdb + yep + i don't understand + what compiles ok, what fails ? + gccgo compiles without errors (which is strange) but when I + use it to compile goroutine.go it fails with the errors I reported + (missing linking to pthread?) + since when ? + pinotree: perhaps braunr: since I made the changes with + mach_print + pinotree: but what could be missing the link? GCC compiled + programs are getting linked automatically to the shared objects of the + headers they include right? + (assuming it's not a huge program, only a tiny 10 liner for + instance) + uh + did you declare them as extern + ? + yes + do you see -lpthread on the link line ? + during gcc's compilation? I will have to rerun it again and + see. + log the compilation output somewhere once + nlightnfotis: why did you remove volatile from the definition of + __atomic_t ?? + just for testing purposes, because I thought that the GNU + version is volatile with no __ in front of it and that might cause some + issues. + i don't understand + it was just an experiment gone wrong + nlightnfotis: keep volatile there + just did + braunr: there is -lpthread on some lines. For instance when + libtool is invoked. + braunr: the pthread assertion usually happens when libpthread gets + loaded from a plugin, I guess mozilla got rid of libpthread in the main + application recently, simply + youpi: he said that the LD_PRELOAD trick (which used to + workaround the issue in older iceweasel) does not work, though + ah? it does work for me + dunno then... + youpi: aouch, ok + nlightnfotis: what about the specific gcc invocation that fails ? + pinotree: /lib/i386-gnu/libpthread.so.0: ERROR: cannot open + `/lib/i386-gnu/libpthread.so.0' (No such file or directory) + trying with a working path this time + better + sorry, i typed it by hand :p + Segmentation fault + but no assertion + braunr: gccgo hello.go + nlightnfotis: ? + nlightnfotis: what about the specific gcc invocation + that fails ? + nlightnfotis: i'm asking if -lpthread is present when you have + these undefined reference errors + it is. it seems so + I wrote above that it is present when libtool is called + I don't know what libtool is doing sadly + you said some lines + but I from what I've seen I believe it does some kind of + linking + paste it somewhere please + yeah it doesn't fail though + that's far too vague ... + it doesn't fail ? + give me a second + i thought it did + no it doesn't + 14:53 < nlightnfotis> gccgo compiles without errors (which is + strange) but when I use it to compile goroutine.go it fails with the + errors I reported + yeah gccgo compiles. + when I use the compiler, it fails + so it fails running + is gccgo built with -lpthread itself ? + http://pastebin.com/1TkFrDcG + check it out + I think it does, but I would take an extra opinion + line 782 + and 784 + (are you building as root ?) + yes. for now + baaad :p + I never had any particular problems...except that one time + that I rm -rf the source tree :P + I know it's bad d/w + braunr: I found something interesting (I don't know if it's + expected or not; probably not): If I set GOMAXPROCS to 2, and run the + goroutine program, it seems to be running for a while (with the + goroutines!) and then it segfaults. Will look more into it + it's interesting, yes + nlightnfotis: have you tried the preload trick too ? + ldpreload? no. Could you tell me how to do it? export + LDPRELOAD and a path to libpthread? + nlightnfotis: LD_PRELOAD=/lib/i386-gnu/libpthread.so.0.3 ... + braunr: it also produces a very different backtrace. This + one heavily involves mig functions + braunr, nlightnfotis: Thanks for working together, and sorry + for my lack of time. + nlightnfotis: paste please + tschwinge, Hello. It's ok, I am sorry for not showing good + amounts of progress from my part. + braunr: http://pastebin.com/J4q2NN9p + nlightnfotis: thread apply all bt full please + braunr: http://pastebin.com/tbRkNzjw + looks like an infinite loop of + __mach_port_mod_refs/__mig_dealloc_reply_port + ... + yes that's what I got from it too. Keep in mind these + results are with GOMAXPROCS=2 and they result in segmentation fault + and I also can not understand the corrupted stack at the + beginning of the backtrace + no please + ? + test LD_PRELOAD=/lib/i386-gnu/libpthread.so.0.3 without + GOMAXPROCS=2 + braunr: LD_PRELOAD without GOMAXPROCS results in the usual + assertion failure and abortion of execution after it + nlightnfotis: ok + nlightnfotis: im sorry, i thought you couldn't launch a test since + you added mach_print + I am not using mach_print, I couldn't fix the issue with the + references and thought I was losing time, so I went back to debugging + with gdb until I can't get anything more out of it + braunr: should I focuse on mach_print? Will it produce very + different results than gdb? + *focus + (btw I didn't delete mach print or anything, it's still + there, in another branch) + braunr: Now I stepped through the program in gdb, and got + something really really weird. Some close to a full execution + Number of gorountines and machine threads according to + runtime was 3, __pthread_num_threads was 4 + it did get SIGILL (illegal instruction some times though) + and it exited with code 02 + uh + nlightnfotis: try with mach_print yes, it will show the values + from the real execution context, and be as close as what we can get + i'm not sure about how gdb finds the values + braunr: ok, will spend the rest of the day to find a way to + make mach_print and the other values work. Did you see my last messages, + with the goroutines that worked under gdb? + yes + it seemed to run. Didn't get the expected output, but also + didn't get any errors other than illegal instruction either + braunr: I still have not found an easy way to do what you + asked me to from go's runtime. Would it be ok if I do it from inside + libpthread? + nlightnfotis: do what ? + print the values of __pthread_total and + __pthread_num_threads with mach_print. + how ? + oh wait + well yes ofc, they're not exported :/ + nlightnfotis: have you been able to use mach_print ? + braunr: not really because of the problems I shared + earlier. I can try to use with in-gcc structures if you want me to, it's + nothing hard to do + actually I will. Hang on + proceed with debugging inside libpthread instead + using mach_print to avoid deadlocks this time + (mach_print was purposely built for debugging such low level code + parts) + ok, I will patch this, but can I build it tomorrow? + yes + just keep us informed + ok, thanks, and sorry for everything I have done. I want you + to know that I really appreciate that you are helping me. + remember: the goal here is to understand why __pthread_total and + __pthread_num_threads have inconsistent values + braunr: whenever you see it, mach_print works as expected + inside gcc. + + +# IRC, freenode, #hurd, 2013-09-03 + + braunr: I have made the changes I want to glibc. After I + build it, how do I install it? make install or is it more involved? + nlightnfotis: use LD_LIBRARY_PATH + never install an experimental glibc unless you have backups or are + certain of what you're doing + nlightnfotis: i didn't understand what you meant about mach_print + yesterday + it works in gcc. + what do you mean "in gcc" ? + why would you put mach_print in gcc ? + we want it in go programs .. + yes, I understand it. gcc was the fastest way to test it's + usage at that moment (for me) and I just wanted to confirm it works. I + only had to change its signature to const char * because gcc wouldn't + accept it otherwise + doesn't my example include const ? + nlightnfotis: why did you rebuild glibc ? + braunr: I have not started yet, will do now, to apply the + changes to libpthread + you mean add the print calls there ? + yes + ok + use debian/rules build, interrupt when you see gcc invocations + then switch to the build directory (hurd-libc-i386 iirc), and make + others + nlightnfotis: did you send me the instructions to build and test + your work ? + so i can reproduce these weird threading problems at my side + braunr: sorry, I was in the toilet, where would you like me + to send the instructions? + nlightnfotis: i should be fine i guess, let's check here + nlightnfotis: i simply used configure + --enable-languages=c,c++,go,lto + and i'll see how it goes + I configure with --enable-languages=go (it automatically + builds c and c++ for that as go depends on them), --disable-bootstrap, + and use a custom prefix to install at a custom location + yes + ok + nlightnfotis: how long does it take you ? + complete non-bootstrap build about 45 minutes. With a build + tree ready and only simple changes, about 2-3 minutes + braunr: In an hour I will go offline for 2-3 hours, I am + gonna move back to my other home in the other city. It won't take long, + the whole process will be about 4 hours, and I will compensate for the + time lost by staying up late up until 3 o clock in the morning + i'd prefer you didn't "compensate" + ? + work if you want to + noone if forcing you to work late at night for gsoc, unless you + want to + no, I do it because I want to. I **really** really want to + succeed, and time is off the essence for me at this point + then ok + nlok i have a gccgo compiler + nlok? + nl being nlightnfotis but he's gone + oh + * pinotree was trying to parse that as "now" or "look" or the like + braunr: 08:19:56< braunr> use debian/rules build, interrupt + when you see gcc invocations: Are gcc invocations related to + i486-gnu-gcc-4.7? + nvm I'm good now :) + of course not, that's only for compiling applications using the + newly built libc + gnu_srs: I didn't exactly understand what you said? Care to + elaborate? which one is for compiling applications using the newly build + libc? -486-gnu-gcc-4.7? + when you see gcc ... -llibc.so you know libc.so is built, and + that is sufficient to use it. + with LD_PRELOAD or LD_LIBRARY_PATH (after cding and building + others) + gnu_srs: thanks for the tip :) + :-D + is anyone else getting glibc build problems? (from apt-get + source glibc, at cxa-finalize.c)? + apt-get source eglibc; apt-get build-dep eglibc (as root); + dpkg-buildpackage -b ... + nlightnfotis: just debian/rules build + to start the glibc build + braunr: oh I have now, it's building without issues so far + when you see gcc processes, it means the build process has + switched from configuring to making + then interrupt (ctrl-c) + cd build-tree/hurd-i386-libc + make others + or make lib others + lib is glibc, others is some addons which include our libpthread + thanks for the tip braunr. + braunr: I have managed to get a working version of glibc and + libpthread with mach_print working. I have also run 2 test programs and + it works as expected. Will continue researching tomorrow if that's ok + with you, I am too tired to keep on now. + for the record compilation of glibc right from the start was + about 1 hour and 20 - 30 minutes + + +# IRC, freenode, #hurd, 2013-09-04 + + i've taken a deeper look at this assertion failure + and ... + it has nothing to do with pthread_create + i assumed it was the one in sysdeps/mach/pt-thread-start.c + pthread_self ()? + but it's actually from sysdeps/mach/hurd/pt-sysdep.h, in + _pthread_self() + and looking there : + thread = *(struct __pthread **)__hurd_threadvar_location + (_HURD_THREADVAR_THREAD); + so simply put, context switching doesn't fix up thread specific + data ... + it's that simple + wow + today I was running programs all day long with mach_print on + to print __pthread_total and __pthread_num_threads to see when both + become 1 and couldn't find anything + I was nearly desperate. You just made my day! :) + now the problem is + thread specific data is highly dependent on the stack + it's illegal to make a thread switch stack and expect it to keep + working on the hurd + unless split stack is activated? + no wait + split stack is completely unsupported on the hurd + uh, why would that be? + teythoon: about split stack ? + yes + i'm not sure + at least now we do know what the problem is and I can start + working on a solution. + braunr: we should tell tschwinge and youpi about it. + nlightnfotis: sure but + nlightnfotis: you can also start looking at a workaround + nlightnfotis: also, let's makre sure that's the reason first + nlightnfotis: use mach_print to display the stack pointer when + switching + nlightnfotis: + http://stackoverflow.com/questions/1880262/go-forcing-goroutines-into-the-same-thread + " I believe runtime.LockOSThread() is necessary if you are + creating a library binding from C code which uses thread-local storage" + oh, a paper about the go runtime scheduler + let's have a look .. + braunr: have you seen the high level overview presented in that + blog post I once posted here? + no + braunr, just came back, and read the log. Which paper are + you reading? The one from columbia university? + but i need to know about details here, specifically, if threads do + change stack + nlightnfotis: yes + braunr: ok + this could be caused either by true stack switching, or by "stack + segmentation" as implemented by go + it is interesting that there are stack related members per + goroutine + nlightnfotis: in particular, pthread_attr_setstacksize() doesn't + work on the hurd + it is interesting that there are stack related + members per goroutine -> I think that's go's policy. All goroutines run + on a shared address space (that is the kernel thread's address space) + nlightnfotis: that's obvious + and not the problem + and yes, it's "stack segmentation" + and on linux, and probably other archs, switching stack may be + perfectly legit + on the hurd, we still have threadvars + which are the hurd specific thread local storage mechanism + it means 1/ all stacks in a process must have the same size + 2/ stack size must be a power of two + 3/ threads can't switch stack + this hardly prevents goroutines from being run by just any thread + i see there already hard hurd specific changes about stack + handling + so we should only make changes to the specific gccgo + scheduler as a workaround under the Hurd right? + i don't know + this might also push the switch to tls + this sounds better as a long term fix + but it must also involve a great amount of work, right? + most of it has already been done + by youpi and tschwinge + with the changes to tls early in the summer? + maybe + 14:36 < braunr> nlightnfotis: also, let's makre sure that's the + reason first + 14:36 < braunr> nlightnfotis: use mach_print to display the stack + pointer when switching + check what goes wrong with the stack + then we'll see + as a very simple workaround, i expect locking g's on m's to be a + good first step + braunr: noted everything. that's my work for tonight. I + expect myself to stay up late like yesterday and have this all figured + out by tomorrow. + nlightnfotis: why not now ? + I am starting from now, but I expect myself to stop about 6 + o clock here (2 hours) because I have an appointment with a doctor. + and keep on when I come back home + well adding a few printfs to track the stack should be doable + before 2 hours + braunr: I am doing it now. Will report as soon as I have + results :) + braunr: have I messed up with the way I read esp's value? + https://github.com/NlightNFotis/glibc/commit/fdab1f5d45a43db5c5c288c4579b3d8251ee0f64#L1R67 + nlightnfotis: +unsigned + nlightnfotis: using gdb : + (gdb) info registers + esp 0x203ff7c0 0x203ff7c0 + (gdb) print thread->stackaddr + $2 = (void *) 0x2000000 + oh yes, I know about gdb, I thought you wanted me to use + mach_print + nlightnfotis: yes + this is just my own attempt + and it does show the stack pointer is completely outside the + thread stack + nlightnfotis: in your code, i suggest using + __builtin_frame_address() + well __builtin_frame_address(0) + see + http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/Return-Address.html#Return-Address + it's not exactly the stack pointer but close enough, unless of + course the stack is changed in the middle of the function + I see. I am gonna try one more time with esp the way I + worked it and if it fails to work, I am gonna use return address + nlightnfotis: be very careful about signed/unsigned and type + widths + not return address, frame address + return address is code, frame address is data (stack) + ah, I see, thanks for the correction. + youpi: not sure you catched it earlier, the problem fotis has been + having with goroutines is about threadvars + simply put, threads use setcontext functions to save/restore + goroutines state, which make them switch stack, rendering the location of + threadvars invalid, and making _pthread_self() choke + + +# IRC, freenode, #hurd, 2013-09-05 + + I am having very weird behavior with my code, something that + I can not explain and seems likely to be a bug, could someone else take a + look? + pinotree are you available at the moment to take a look at + something? + nlightnfotis: dont ask to ask, just ask + I have made some modifications to pthread_self as also + suggested by braunr to see if the stack pointer is within the bounds of + the frame address after context switching. I can get the values of both + esp and frame_address to be shown before the context switch, but I can + only get the value of esp to be shown after the context switch, and it + always results to the program getting killed + + https://github.com/NlightNFotis/glibc/blob/7e72da09a42b1518865f6f4882d68689e681f25b/libpthread/sysdeps/mach/hurd/pt-sysdep.h#L97 + thing is a dummy print value I have right after the code + that was supposed to print the frame_address after the context switching + is executing without any issues. + oh assembler... cannot help, sorry :/ + oh no, I am not asking for assembler help, that part works + quite alright. I am asking why from the 4 identical pieces of code that + print debugging values the last one doesn't work. I am on it all day, and + still have not found an answer + nlightnfotis: i can + hello braunr, + nlightnfotis: do you have a backtrace ? + uh + nope, it crashes right after I execute something. Let me + compile glibc once again and see if a fix I attempted works + malloc and free use locks + so they probably use _pthread_self + don't use them + for debugging, a simple statically allocated buffer on the stack + will do + nlightnfotis: so ? + Ι got past my original problem, but now I am trying to get + past the sigkills that kill the program at the beginning + i remember not having this problem, so I am compiling my + master branch to see if it is reproducible. If it is, it means something + is very wrong. If it's not, it means I screwed up somewhere + i don't understand, how do you know if you get past the problem if + you still have trouble reaching that code ? + braunr: I fixed all my problems now. I can see that both esp + and the frame_address are the same after context switching though? + always ? + for all goroutines ? + for all kernel threads, not go routines. We are in + libpthread + if they're the same after a context switch, it usually means the + scheduler didn't switch + well obviously + but what i asked you was to trace calls to setcontext functions + I will run some tests again. May I show you my code to see + if there is anything wrong with it? + what address do you have ? + not yet + i'm not sure you understand what i want to check + do you see how threadvars work basically ? + I think so yes, they keep in the stack the local variables + of a thread right? + and the globals + or + wait a minute... + yes but do you see how the thread specific data are fetched ? + with __hurd_threadvar_location_from_sp? + yes but "basically", what does it do ? + it get's a stack pointer as a parameter, and returns the + location of that specific data based on that stack pointer, right? + and how ? + I believe it must compare the base value of the stack and + the value of the end of the stack, and if the results are consistent, it + returns a pointer to the data? + and how does it determine the start and end of the stack ? + stack_pointer must be pointing at the base of the + stack. That + stack_size must be the stack limit I guess. + so you're saying the caller of __hurd_threadvar_location_from_sp + knows the stack base ? + I am not so sure I understand this question. + i want to know if you understand how threadvars work + apparently you don't + the caller only has its current stack pointer + which does *not* point to the stack base + threadvars work by assuming a *fixed* stack size, power of two, + aligned (obviously) + in our case, 2MiB (except in hurd servers where a kludge reduces + that to 64k) + this is why stack size can't be changed + this is also why the stack pointer can't ever point outside the + initial stack + i want you to make sure go violates this last assumption + so 1/ show the initial stack boundaries of your threads, then show + that, after loading a goroutine, the stack pointer is outside + which is what, if i'm right, triggers the assertion + ask if there is anything confusing + this is important, it should already have been done + ok, I noted it all, I am starting to work on it right now. I + only have one question. My results, the ones with the stack pointer and + the frame address, are expected or unexpected? + i don't know + show me the code again please + and explain your intent + + https://github.com/NlightNFotis/glibc/blob/7fe202317db4c3947f8ae1d1a4e52f7f0642e9ed/libpthread/sysdeps/mach/hurd/pt-sysdep.h + At first I print the value of esp and the frame_address + before the context switching and after the context switching. + The different variables were introduced as part of a test to + see if my results were consistent, + what context switch ? + in hurd_threadvar_location + what makes you think this is a context switch ? + in threadvar.h, it calls __hurd_threadvar_location_from_sp. + the full path for it is glibc/hurd/hurd/threadvar.h + i don't see how giving me the path will explain why it's a context + switch + and i can tell you right away it's not + hurd_threadvar_location is basically a lookup returning the + address of the thread specific data + wait a minute...does this mean that + hurd_threadvar_location_from_sp is also a lookup function for the same + reason + ? + yes + isn't the name meaningful enough ? + "location of the threadvars from stack pointer" + I guess I made wrong deductions from when you originally + shared your findings... + thread = *(struct __pthread + **)__hurd_threadvar_location (_HURD_THREADVAR_THREAD); + so simply put, context switching doesn't fix up + thread specific data ... + I thought that hurd_threadvar_location was doing the context + switching + nlightnfotis: by context switching, i mean setcontext functions + braunr: You mean the one in sysdeps/mach/hurd/i386? + yes + but + do you understand what i want you to check now ? + I think I got this time: Let me explain it: + You suggested that stack sizes are fixed. That is the main + reason that the stack pointer should not be able to point outside of it. + no + locating threadvars is done by applying a mask, computed from the + stack size, on the stack pointer, to determine its base + yeah, what __hurd_threadvar_location_from_sp is doing + if size is a power of two, size - 1 is a mask that, if + complemented, aligns the address + yes + so, threadvars expect the stack pointer to always point to the + initial stack + and we wanna prove that go violates this rule right? That + the stack pointer is not pointing at the initial stack + yes diff --git a/community/gsoc/project_ideas/download_backends.mdwn b/community/gsoc/project_ideas/download_backends.mdwn index f794e814..c0bdc5b2 100644 --- a/community/gsoc/project_ideas/download_backends.mdwn +++ b/community/gsoc/project_ideas/download_backends.mdwn @@ -1,12 +1,12 @@ -[[!meta copyright="Copyright © 2009 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2009, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.2 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license -is included in the section entitled -[[GNU Free Documentation License|/fdl]]."]]"""]] +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] [[!meta title="Use Internet Protocol Translators (ftpfs etc.) as Backends for Other Programs"]] @@ -19,8 +19,9 @@ Download protocols like FTP, HTTP, BitTorrent etc. are very good candidates for this kind of modularization: a program could simply use the download functionality by accessing FTP, HTTP etc. translators. -There is already an ftpfs translator in the Hurd tree, as well as an [httpfs -translator on hurdextras](http://www.nongnu.org/hurdextras/#httpfs); however, +There is already an [[hurd/translator/ftpfs]] translator in the Hurd tree, as +well as an [[hurd/translator/httpfs]] on +[hurdextras](http://www.nongnu.org/hurdextras/); however, these are only suitable for very simple use cases: they just provide the actual file contents downloaded from the URL, but no additional status information that are necessary for interactive use. (Progress indication, error codes, HTTP diff --git a/community/gsoc/project_ideas/mtab/discussion.mdwn b/community/gsoc/project_ideas/mtab/discussion.mdwn index 0e322c11..716fb492 100644 --- a/community/gsoc/project_ideas/mtab/discussion.mdwn +++ b/community/gsoc/project_ideas/mtab/discussion.mdwn @@ -106,7 +106,7 @@ License|/fdl]]."]]"""]] # IRC, freenode, #hurd, 2013-06-25 -In context of [[microkernel/mach/mig/documentation/structured_data]]. +In context of [[open_issues/mig_portable_rpc_declarations]]. should I go for an iterator like interface instead? btw, what's the expected roundtrip time? @@ -905,3 +905,1168 @@ In context of [[microkernel/mach/mig/documentation/structured_data]]. ah, i think so then you don't need to do it again right, I overlooked that + + +## IRC, freenode, #hurd, 2013-07-12 + + recursively traversing all translators from / turns out to be + more dangerous than I expected + ... if done by a translator bound somewhere below /... + my interpretation is that the mtab translator tries to talk to + itself and deadlocks + (and as a side effect the whole system kinda just stops...) + + +## IRC, freenode, #hurd, 2013-07-15 + + teythoon: did you discuss with braunr about returning port vs path + in fsys_get_children? + youpi: we did + as I wrote I looked at the getcwd source you pointed me at + and I started to code up something similar + but as far as I can see there's no way to tell from a port + referencing a file the directory this file is located in + ah, right, there was a [0] mail + teythoon: because it doesn't have a "..", right + about Neals concerns, he's right about not covering passive + translators very well + but the solution he proposed was similar to what I tried to do + first + I don't like half-covering passive translators at all, to be honest + :) + either covering them completely, or not at all, would be fine + and then braunr convinced me that the "recursive" approach is + more elegant and hurdish, and I came to agree with him + youpi: one could scan the filesystem at translator startup and + populate the list + by "Neal's solution", you mean an mtab registry? + yes + so, let's see what linux does when renaming parent directories + mount points you mean? + teythoon: browsing the whole filesystem just to find passive + translators is costly + teythoon, braunr: and that won't prevent the user from unexpectedly + starting other translators at will + scary + youpi: but that requires the privilege to open the device + the fact that a passive translator is set is nothing more than a + user having the intent of starting a translator + linux retains the original path in the mount table + heh + youpi: any unprivileged user can trigger a translator startup + sure, but root can do that too + and expect the system to behave nicely + but if I'm root and want to fsck something, I won't start + translators accessing the device just before that + but if there's a passive translator targetting the device, + someone else might do that + root does not always completely control what he's doing + linux for instance does prevent from mounting a filesystem being + checked + but still, including passive translators in the list would at + least prevent anyone starting an translator by accident, isn't that worth + doing then? + if there's a way to prevent root too, that's better than having a + half-support for something which we don't necessarily really want + (i.e. an exclusive lock on the underlying device) + right, that would also do the trick + btw, some programs or scripts seem to hardcode /proc/mounts and + procfs and I cannot bind a translator to /proc/mounts since it is + read-only and the node does not exist + IMHO automatically starting translators is a generic feature, and + passive translator is just a specific instance of it; but we could very + well have, like an "autofs" that automatically start translators in tar + archives and iso images, allowing to cd into any tar/iso on the system; + implementing such things is part of the Hurd flexibility, the "core + system" shouldn't be too aware on how translators are started + so in the end, storing where the active translator was started + first seems okayish according to what linux has been exposing for decades + kilobug: indeed + it could serve a mounts with a passive translator by default, or + a link to /run/mtab, or an simple file so we could bind a translator to + that node + I'd tend to think that /proc/mounts should be a passive translator + and /run/mtab / /etc/mtab a symlink to it + not being to choose the translator is a concern however + ok, I'll look into that + it could be an empty file, and people be able to set a translator + on it + if it had a passive translator, people still could bind their + own translator to it later on, right? + afaics the issue currently is mostly, that there is no mounts + node and it is not possible to create one + right + cool + so with the actual path, you can even check for caller's permission + to read the path + i.e. not provide any more information than the user would be able + to get from browsing by hand + sure, that concern of Neil's is easy to address + I'm not so much concerned by stale paths being shown in mtab + the worst that can happen is a user not being able to umount the + path + but he can settrans -g it + (which he can't on linux ;) ) + yes, and the device information is still valid + yes + despite the parent dir being renamed, linux is still able to + umount the new path + and so is our current umount + good + (if one uses the mount point as argument) + what's the current plan concerning /proc/mounts ? + serving a node with a passive translator record + ? + so that /hurd/mtab / is started on access + i mean, still planning on using the recursive approach instead of + a registry ? + ah + I do not feel confident enough to decide this, but I agree with + you, it feels elegant + and it works :) + modulo the translator deadlocking if it talks to itself, any + thoughts on that? + it is a non-threaded translator I guess? + currently yes + making it threaded should fix the issue + I tried to make the mtab translator multithreaded but that + didn't help + that's odd + maybe I did it wrong + i don't find it surprising + well, not that surprising :p + on what lock does it block ? + as far as i can see the only difference of hello and hellot-mt + is that it uses a different dispatcher and has lot's of locking, right? + braunr: I'm not sure, partly because that wrecked havoc on the + whole system + it just freezes + but it wasn't permanent. once i let it running and it recovered + consider using a subhurd + ah right, I ment to set up one anyway, but my first attempts + were not successful, not sure why + anyway, is there a way to prevent this in the first place? + if one could compare ports that'd be helpful + Mmm, did you try to simply compare the number? + with the bootstrap port I presume? + Mmm, no, the send port and the receive port would be different + no, with the receive port + ah + comparing the numbers should work + youpi: no they should be the same + braunr: ah, then it should work yes + that's why there are user ref counts + ok + only send-once rights have their own names + btw, I'll push my work to darnassus from now on, + e.g. http://darnassus.sceen.net/gitweb/?p=teythoon/hurd.git;a=shortlog;h=refs/heads/feature-mtab-translator-v3-wip + + +## [[open_issues/libnetfs_passive_translators]] + + +## IRC, freenode, #hurd, 2013-07-16 + + which port is the receive port of a translator? I mean, how is + it called in the source, there is no port in sight named receive anywhere + I looked. + teythoon: what is the "receive port of a translator" ? + braunr: we talked yesterday about preventing the mtab deadlock + by comparing ports + I asked which one to use for the comparison, youpi said the + receive port + i'm not sure what he meant + it could be the receive port used for the RPC + but i don't think it's exported past mig stub code + weird, I just reread it. I asked if i should use the bootstrap + port, and he said receive port, but it might have been addressed to you? + you were talking about send and receive ports being singletons + or not + umm + no i answered him + he was wondering if the receive port could actually be used for + comparison + i said it can + but still, i'm not sure what port + if it's urgent, send him a mail + no, my pipeline is full of stuff I can do instead ;) + :) + + +## IRC, freenode, #hurd, 2013-07-17 + + braunr: btw, comparing ports solved the deadlock in the mtab + translator rather easily + :) + which port then ? + currently I'm stuck though, I'm not sure how to address Neals + concern wrt to access permission checks + I believe it's called control port + ok + the one one gets from doing the handshake with the parent + i thought it was the bootstrap port + but i don't know the details so i may be wrong + anyway + yes + what is the permission problem again ? + 871u73j4zp.wl%neal@walfield.org + well, you could perform a lookup on the stored path + as if opening the node + if I look at any server implementation of a procedure from + fs.defs (say libtrivfs/file-chmod.c [bad example though, that looks wrong + to me]), there is permission checking being done + any server implementation of a procedure from fsys.defs lacks + permission checks, so I guess it's being done somewhere else + i must say i'm a bit lost in this discussion + i don't know :/ + can *you* sum up the permission problem please ? + i mean here, now, in just a few words ? + ok, so I'm extending the fsys api with the get_children + procedure + that one should not return any children x/y if the user doing + the request has no read permissions on x + really ? + why so ? + the same way ls x would not reveal the existence of y + i could also say unlike cat /proc/mounts + i can see why we would want that + i also can see why we could let this behaviour in place + let's admit we do want it + true, but I thought this could easily be addressed + what you could do is + now I'm not sure b/c I cannot even find the permission checking + code for any fsys_* function + for each element in the list of child translators + perform a lookup on the stored path on behalf of the user + and add to the returned list if permission checks pass + teythoon: note that i said lookup on the path, which is an fs + interface + i assume there is no permission checking for the fsys interface + because it's done at the file (fs) level + i think so too, yes + sure, if I only knew who made the request in the first place + the file-* options have a convenient credential handle passed in + as first parameter + s/options/procedures/ + surely the fsys-* procedures also have a means of retrieving + that information, I just don't know how + mig magic + teythoon: see file_t in hurd_types.defs + there is the macro FILE_INTRAN which is defined in subdirectories + (or not) + ah, retrieving the control port requires permissions, and the + fsys-* operations then operate on the control port? + see libdiskfs/fsmutations.h for example + uh yes but that's for < braunr> i assume there is no permission + checking for the fsys interface because it's done at the file (fs) level + i'm answering < teythoon> sure, if I only knew who made the + request in the first place + teythoon: do we understand each other or is there still something + fuzzy ? + braunr: thanks for the pointers, I'll read up on that a bit + later + teythoon: ok + + +## IRC, freenode, #hurd, 2013-07-18 + + braunr: back to the permission checking problem for the + fsys_get_children interface + I can see how this could be easily implemented in the mtab + translator, it asks the translator for the list of children and then + checks if the user has permission to read the parent dir + but that is pointless, it has to be implemented in the + fsys_get_children server function + yes + why is it pointless ? + because one could circumvent the restriction by doing the + fsys_get_children call w/o the mtab translator + uh no + you got it wrong + what i suggested is that fsys_get_children does it before + returning a list + the problem is that the mtab translator has a different identity + from the users accessing it + yes, but I cannot see how to do this, b/c at this point I do not + have the user credentials + get them + how? + 16:14 < braunr> mig magic + 16:15 < braunr> teythoon: see file_t in hurd_types.defs + 16:16 < braunr> there is the macro FILE_INTRAN which is defined in + subdirectories (or not) + 16:16 < braunr> see libdiskfs/fsmutations.h for example + i saw that + is there a problem i don't see then ? + i suppose you should define FSYS_INTRAN rather + but the idea is the same + won't that change all the function signatures of the fsys-* + family? + that's probably the only reason not to implement this feature + right now + then again, that change is probably easy and mechanic in nature, + might be an excuse to play around with coccinelle + why not + if you have the time + right, if this can be done, the mtab translator (if run as root) + could get credentials matching the users credentials to make that + request, right? + i suppose + i'm not sure it's easy to make servers do requests on behalf of + users on the hurd + which makes me wonder if the mtab functionality shouldn't be + implemented in glibc eheheh .... + but probably not + well, I'll try out the mig magic thing and see how painful it is + to fix everything ;) + good luck + honestly, i'm starting to think it's deviating too much from your + initial goal + i'd be fine with a linux-like /proc/mounts + with a TODO concerning permissions + ok, fine with me :) + confirm it with the other mentors please + we have to agree quickly on this + y? + + braunr: I actually believe that the permission issue can be + addressed cleanly and unobstrusively + braunr: would you still be opposed to the get_children approach + if that is solved? + the filesystem is a tree and the translators "creating" that + tree are a more coarse version of that tree + having a method to traverse that tree seems natural to me + teythoon: it is natural + i'm just worried it's a bit too complicated, unnecessary, and + out-of-scope for the problem at hand + (which is /proc/mounts, not to forget it) + + +## IRC, freenode, #hurd, 2013-07-19 + + braunr: I think you could be a bit more optimistic and + supportive of the decentralized approach + I know the dark side has cookies and strong language and it's + mighty tempting + but both are bad for you :p + + +## IRC, freenode, #hurd, 2013-07-22 + + teythoon: AIUI, you should be able to run the mtab translator as + no-user (i.e. no uid) + youpi: yes, that works fine + + teythoon: so there is actually no need to define FSYS_INTRAN, doing + it by hand as you did is fine, right? + (/me backlogs mails...) + youpi: yes, the main challenge was to figure out what mig does + and how the cpp is involved + heh :) + my patch does exactly the same, but only for this one server + function + youpi: I'm confused by your mail, why are read permissions on + all path components necessary? + teythoon: only execution normally + teythoon: to avoid letting a user discover a translator running on + a hidden directory + braunr: exactly, and that is tested + e.g. ~/home/foo is o+x, but o-r + and I have a translator running on ~/home/foo/aZeRtYuyU + I don't want that to show up on /proc/mounts + youpi: i don't understand either: why isn't execution permission + enough ? + youpi: but that requires testing for read on the *last* + component of the *dirname* of your translator, and that is tested + let me take another example :) + e.g. ~/home/foo/aZeRtYuyU is o+x, but o-r + and I have a translator running on ~/home/foo/aZeRtYuyU/foo + ergl sorry, I meant this actually: + yes, that won't show up then in the mtab for users that are not + you and not root + e.g. ~/home/foo is o+x, but o-r + and I have a translator running on ~/home/foo/aZeRtYuyU/foo + ah + hmm, good point + ? + * braunr still confused + well, qwfpgjlu is the secret + and that is revealed by the fsys_get_children procedure + then i didn't understand the description of the call right + > + /* check_access performs the same permission check as is + normally + > + done, i.e. it checks that all but the last path components + are + > + executable by the requesting user and that the last + component is + > + readable. */ + braunr: youpi argues that this is not enough in this case + from that, it looks ok to me + the function and the documentation agree, yes + but that's not what we want + and that's where i fail to understand + again, see my example + i am + 10:43 < youpi> e.g. ~/home/foo is o+x, but o-r + ok + so the user is not supposed to find out the secret + then your example isn't enough to describe what's wron + g + checking read permission only on ~/home/foo/aZeRtYuyU will not + garantee that + ah + i thought foo was the last component + no, that's why I changed my example + hum + 10:43 < youpi> e.g. ~/home/foo is o+x, but o-r + 10:43 < youpi> and I have a translator running on + ~/home/foo/aZeRtYuyU/foo + i meant, the last foo + still, this is easily fixed + sure + just has to be :) + youpi, braunr: so do you think that this approach will work? + I believe so + i still don't see the problem, so don't ask me :) + i've been sick all week end and hardly slept, which might explain + in the example, "all but the last path components" is + "~/home/foo/aZeRtYuyU" + right ? + braunr: well, I haven't looked at the details + but be it the last, or but-last doesn't change the issue + if my ~/hidden is o-r,o+x + and I have a translator on ~/hidden/a/b/c/d/e + checking only +x on hidden is not ok + but won't the call also check a b c d ? + yes, but that's not what matters + what matters is that hidden is o-r + hm + so the mtab translator is not supposed to reveal that there is an + "a" in there + ok i'm starting to understand + so r must be checked on all components too + yes + right + to simulate the user doing ls, cd, ls, cd, etc. + well, not cd + ah + for being able to do ls, you have to be able to do cd + as an ordered list of commands + ok + agreed. can you think of any more issues? + so both x and r must be checked + so in the end this RPC is really a shortcut for a find + fsysopts + script + teythoon: I don't see any + teythoon: i couldn't take a clear look at the patch but + do you perform a lookup on all nodes ? + yes, all nodes on the path from the root to the one specified by + the mount point entry in the active translator list + let me rephrase + do you at some point do a lookup, similar to a find, on all nodes + of a translator ? + no + good + yes + iirc, neal raised that concern once + and I'll also fix settrans --recursive not to iterate over *all* + nodes either + great + :) + fsys_set_options with do_children=1 currently does that (I've + only looked at the diskfs version) + + +## IRC, freenode, #hurd, 2013-07-27 + + youpi: ah, I just found msg_get_init_port, that should make the + translator detection feasible + + +## IRC, freenode, #hurd, 2013-07-31 + + braunr: can I discover the sender of an rpc message? + teythoon: no + teythoon: what do you mean by "sender" ? + braunr: well, I'm trying to do permission checks in the + S_proc_mark_essential server function + ok so, the sending user + that should be doable + I've got a struct proc *p courtesy of a mig intran mutation and + a port lookup + but that is not necessarily the sender, right? + proc is really the server i know the least :/ + there is permission checking for signals + it does work + you should look there + yes, there are permission checks there + but the only argument the rpc has is a mach_port_t refering to + an object in the proc server + yes + anyone can obtain such a handle for any process, no? + can you tell where it is exactly please ? + i don't think so, no + what? + 14:42 < teythoon> but the only argument the rpc has is a + mach_port_t refering to an object in the proc server + ah + the code you're referring to + a common way to give privileges to public objects is to provide + different types of rights + a public (usually read-only) right + and a privileged one, like host_priv which you may have seen + acting on (modifying) a remote object normally requires the latter + http://paste.debian.net/20795/ + i thought you were referring to existing code + well, there is existing code doing permission checks the same + way I'm doing it there + where is it please ? + mgt.c ? + proc/mgt.c (S_proc_setowner) for example + yes + that's different + but anyone can obtain such a reference by doing proc_pid2proc + the sender is explicitely giving the new uid + yes but not anyone is already an owner of the target process + (although it may look like anyone has the right to clear the owner + oO) + see, that's what made me worry, it is not checked who's the + sender of the message + unless i'm missing something here + ah + I am + pid2proc returns EPERM if one is not the owner of the process in + question + all is well + ok + it still requires the caller process though + what? + see check_owner + the only occurrence i find in the hurd is in libps/procstat.c + MGET(PSTAT_PROCESS, PSTAT_PID, proc_pid2proc (server, ps->pid, + &ps->process)); + server being the proc server AIUI + yes, most likely + but pid2proc describes this first argument to be the caller + process + ah but it is + ? + mig magic :p + MIGSFLAGS="-DPROCESS_INTRAN=pstruct_t reqport_find (process_t)" + \ + MIGSFLAGS="-DPROCESS_INTRAN=pstruct_t reqport_find (process_t)" + \ + ah nice + hum no + this just looks up the proc object from a port name, which is + obvious + what i mean is + 14:53 < braunr> MGET(PSTAT_PROCESS, PSTAT_PID, proc_pid2proc + (server, ps->pid, &ps->process)); + this is done in libps + which can be used by any process + server is the proc server for this process (it defines the process + namespace) + yes, but isn't the port to the proc server different for each + process? + no, the port is the same (the name changes only) + ports are global non-first class objects + and the proc server can thus tell with the lookup which process + it is talking to? + that's the thing + from pid2proc : + S_proc_pid2proc (struct proc *callerp + [...] + if (! check_owner (callerp, p)) + check_owner (struct proc *proc1, struct proc *proc2) + "Returns true if PROC1 has `owner' privileges over PROC2 (and can + thus get its task port &c)." + callerp looks like it should be the caller process + but in libps, it seems to be the proc server + this looks strange to me + yep, to me too, hence my confusion + could be a bug that allows anyone to perform pid2proc + braunr: well, proc_pid2proc (getproc (), 1, ...) fails with + EPERM as expected for me + ofc it does with getproc() + but what forces a process to pass itself as the first argument ? + braunr: nothing, but what else would it pass there? + 14:53 < braunr> MGET(PSTAT_PROCESS, PSTAT_PID, proc_pid2proc + (server, ps->pid, &ps->process)); + everyone knows the proc server + ok now, that's weird + teythoon: does getproc() return the proc server ? + I think so, yes + damn those distributed systems, all of their sources are so + distributed too + i suspect there is another layer of dark glue in the way + I cannot even find getproc :/ + hurdports.c:GETSET (process_t, proc, PROC) + that's the dark glue :p + ah, so it must be true that the ports to the proc server are + indeed process specific, right? + ? + well, it is not one port to the proc server that everyone knows + it is + what makes you think it's not ? + proc_pid2proc (getproc (), 1, ...) fails with EPERM for anyone + not being root, but succeeds for root + hm right + if getproc () were to return the same port, the proc server + couldn't distinguish these + indeed + in which case getproc() actually returns the caller's process + object at its proc server + yes, that is better worded + teythoon: i'm not sure it's true actually :/ + braunr: well, exploit or it didn't happen + teythoon: getproc() apparently returns a bootstrap port + we must find the code that sets this port + i have a hard time doing that :/ + isn't part of the stuff which is passed to a new process by + exec? + braunr: I know that feeling + pinotree: probably + still hard to find .. + search in glibc + braunr: exec/exec.c:1654 asks the proc server for the proc + object to use for the new process + so how much of hurd do I have to rebuild once i changed struct + procinfo in hurd_types.h? + oh noez, glibc uses it too :/ + + +## IRC, freenode, #hurd, 2013-08-01 + + I need some pointers on building the libc, specifically how to + point libcs build system to my modified hurd headers + nlightnfotis: hi + nlightnfotis: you rebuild the libc right? do you have any hurd + specific pointers for doing so? + teythoon, I have not yet rebuild the libc (I was planning + to, but I followed other courses of action) Thomas had pointed me to some + resources on the Hurd website. I can look them up for you + teythoon, here are the instructions + http://darnassus.sceen.net/~hurd-web/open_issues/glibc/debian/ + and the eglibc snapshot is here + http://snapshot.debian.org/package/eglibc/ + nlightnfotis: yeah, I found those. the thing is I changed a + struct in the hurd_types.h header, so now I want to rebuild the libc with + that header + and I cannot figure out how to point libcs build system to my + hurd headers + :/ + can you patch eglibc and build that one instead? + teythoon: put your header in the appropriate /usr/include/ dir + pochu: is there no other way? + iirc nope + teythoon: you may be able to pass some flag to configure, but I + don't know if that will work in this specific case + ouch >,< that explains why I haven't found one + check ./configure --help, it's usually FOO_CFLAGS (so something + like HURD_CFLAGS maybe) + but then you may need _LIBS as well depending on how you changed + the header... so in the end it's just easier to put the header in + /usr/include/ + teythoon: did you find the info for your libc build ? + braunr: well, i firmlinked my hurd_types.h into /usr/include/... + ew + i recommend building debian packages + but the build was not successful, looks unrelated to my changes + though + I tried that last week and the process took more than eight + hours and did not finish + use darnassus + it takes about 6 hours on it + I shall try again and skip the unused variants + i also suggest you use ./debian/rules build + and then interrupt the build process one you see it's building + object files + go to the hurd-libc-i386 build dir, and use make lib others + make lib builds libc, others is for companion libraries lik + libpthread + actually building libc takes less than an hour + so once you validate your build this way, you know building the + whole debian package will succedd + succeed* + so how do I get the build system to pick up my hurd_types.h? + sorry if this is obvious to you, you might be more familiar with + debian than i am :) + patch the hurd package + append your own version string like +teythoon.hurd.1 + install it + then build libc + i'll reboot darnassus so you have a fresh and fast build env + almost a month of uptime without any major issue :) + err, but I cannot install my hurd package on darnassus, can I? I + don't think that'd be wise even if it were possible + teythoon: rebooted, enjoy + why not ? + i often do it for my own developments + teythoon: screen is normally available + teythoon: be aware that fakeroot-tcp is known to hang when pfinet + is out of ports (that's a bug) + it takes more time to reach that bug since a patch that got in + less than a year ago, but it still happens + the hurd packages are quick to build, and they should only provide + the new header, right ? + you can include the functionality too in the packages if you're + confident enough + but my latest work on the killing of essential processes issues + involves patching hurd_types.h and that in a way that breaks the ABI, + hence the need to rebuild the libc (afaiui) + teythoon: yes, this isn't uncommon + braunr: this is much more intrusive than anything I've done so + far, so I'm not so confident in my changes for now + teythoon: show me the patch please + braunr: it's not split up yet, so kind of messy: + http://paste.debian.net/21403/ + teythoon: did you make sure to add RPCs at the end of defs files ? + yes, I got burned by this one on my very first attempt, you + pointed out that mistake + :) + ok + you're changing struct procinfo + this really breaks the abi + yes + i.e. you can't do that + I cannot put it at the end b/c of that variable length array + you probably should add another interface + that'd be easier, sure, but this will slow down procfs even + more, no? + that's secondary + it won't be easier, breaking the abi may break updates + in which case it's impossible + another way would be to ues a new procinfo struct + like struct procinfo2 + but then you need a transition step so that all users switch to + that new version + which is the best way to deal with these issues imo, but this time + not the easiest :) + ok, so I'll introduce another rpc and make sure that one is + extensible + hum no + this usually involves using a version anyway + no? but it is likely that we need to save more addresses of this + kind in the future + in which case it will be hanlded as an independant problem with a + true solution such as the one i mentioned + it could return an array of vm_address_ts with a length + indicating how many items were returned + it's ugly + the code is already confusing enough + keep names around for clarity + ok, point taken + really, don't mind additional RPCs when first adding new features + once the interface is stable, a new and improved version becomes a + new development of its own + you're invited to work on that after gsoc :) + but during gsoc, it just seems like an unnecessary burden + ok cool, I really like that way of extending Hurd, it's really + easy + and feels so natural + i share your concern about performances, and had a similar problem + when adding page cache information to gnumach + in the end, i'll have to rework that again + because i tried to extend it beyond what i needed + true, I see how that could happen easily + the real problem is mig + mig limits subsystems to 100 calls + it's clearly not enough + in x15, i intend to use 16 bits for subsystems and 16 bits for + RPCs, which should be plenty + that limit seems rather artificial, it's not a power of two + yes it is + so let's fix it + mach had many artificial static limits + eh :D + not easy + replies are encoded by taking the request ID and adding 100 + uh + "uh" indeed + so we need an intermediate version of mig that accepts both + id+100 and dunno id+2^x as replies for id + or -id - 1 + that would completely break the abi + braunr: how so? the change would be in the *_server functions + and be compatible with the old id scheme + how do you make sure id+2^x doesn't conflict with another id ? + oh, the id is added to the subsystem id? + to obtain a global message id? + yes + ah, I see + ah, but the hurd subsystems are 1000 ids apart + so id+100 or id +500 would work + we need to make sure it's true + always true + so how many bits do we have for the message id in mach? + (mig?) + mach shouldn't care, it's entirely a mig thing + well yes and no + mach defines the message header, which includes the message id + see mach/message.h + mach_msg_id_t msgh_id; + typedef integer_t mach_msg_id_t; + well, if that is like a 32 bit integer, then allow -id-1 as + reply and forbid ids > 2^x / 2 + yes + seems reasonable + that'd give us an smooth upgrade path, no? + i think so + + +## IRC, freenode, #hurd, 2013-08-28 + + teythoon: Mmm, your patch series does not make e.g. ext2fs provide + a diskfs_get_source, does it? + + +## IRC, freenode, #hurd, 2013-08-29 + + youpi: that is correct + teythoon: Mmm, I must be missing something then: as such the patch + series introduces an RPC, but only EOPNOTSUPP is ever returned in all + cases for now? + ah + /* Guess based on the last argument. */ + since ext2fs & such report their options with store last, it seems + ok indeed + it still seems a bit lame not to return that information in + get_source + yes + well, if it had been just for me, I would not have created that + rpc, but only guessing was frowned uppon iirc + then again, maybe this should be used and then the mtab + translator could skip any translators that do not provide this + information to filter out non-"filesystem" translators + guessing is usually trap-prone, yes + if it is to be used by mtab, then maybe it should be documented as + being used by mtab + otherwise symlink would set a source, for instance + while we don't really want it here + why would the symlink translator answer to such requests? it is + not a filesystem-like translator + no, but the name & documentation of the RPC doesn't tell it's only + for filesystem-like translators + well, the documentation does say "filesystem" + but it does not clearly specify that one shouldn't implement + get_source if one is not a filesystme + "If the concept of a source is applicable" works for a symlink + that could be the same for eth-filter, etc. + right + Mmm, that said it's fsys.defs + not io.defs + teythoon: it is the fact that we get EOPNOTSUPP (i.e. fsys + interface supported, just not that call), and not MIG_BAD_ID (i.e. fsys + interface not supported), that filters out symlink & such, right? + that's what I was thinking, but that's based on my + interpretation of EOPNOPSUPP of course ;) + teythoon: I believe that for whatever is a bit questionable, even + if you put yourself on the side that people will probably agree on, the + discussion will still take place so we make sure it's the right side :) + (re: start/end_code) + I'm not sure I follow + youpi: /proc/pid/stat seems to be used a lot: + http://codesearch.debian.net/search?q=%22%2Fproc%2F.*%2Fstat%22 + that does not mean that start/endcode is used, but still it + seems like a good thing to mimic Linux closely + stat is used a lot for cpu usage for instance, yes + start/endcode, I really wonder who is using it + using it for kernel thread detection looks weird to me :) + (questionable): I mean that even if you take the time to put + yourself on the side that people will probably agree on, the discussion + will happen + it has to happen so people know they agree on it + I've seen that a lot in various projects (not only CS-related) + ok, I think I got it + it's to document the reasons for (not) doing something? + something like this, yes + even if you look right, people will try to poke holes + just to make sure :) + btw, I think it's rather unusual that our storeio experiments + would produce such different results + you're right about the block device, no idea why I got a + character file there + I used settrans -ca /tmp/hello.unzipped /hurd/storeio -T + gunzip:file /tmp/hello + also I tried stacking the translator on /tmp/hello directly, + from what I've gathered that should be possible, but I failed + ftr I use the exec server with all my patches, so the unzipping + code has been removed from it + ah, I probably still have it + it shouldn't matter here, though + I agree + how would you stack it? + I've never had a look at that + I'm not sure attaching the translator to the node is done before or + after the translator has a change to open its target + right + but it could be done, if storeio used the reference to the + underlying node, no? + yes + btw, you had said at some point that you had issues with running + remap. Was the issue what you fixed with your patches? + * youpi realizes that he should have shown the remap.c source code during + his presentation + well, I tried to remap /servers/exec (iirc) and that failed + then again, I recently played with remap and all seemed fine + but I'm sure it has nothing to do with my patches + ok + those I came up with investigating fakeroot-hurd + and I saw that this also aplies to remap.sh + *while + yep, they're basically the same + btw, I somehow feel settrans is being abused for chroot and + friends, there is no translator setting involved + chroot, the command? or the settrans option? + I don't understand what you are pointing at + the settrans option being used by fakeroot, remap and (most + likely) our chroot + our chroot is just a file_reparent call + fakeroot and remap do start a translator + yes, but it is not being bound to a node, which is (how I + understand it) what settrans does + the point being that if settrans is being invoked with --chroot, + it does something completely different (see the big if (chroot) {...} + blocks) + to a point that it might be better of in a separate command + Mmm, indeed, a lot of the options don't make sense for chroot + + +## IRC, freenode, #hurd, 2013-09-06 + + teythoon: do you personally prefer /proc being able to implement + /proc/self on its own, or using the magic server to tell clients to + resolve those specific cases themselves ? + imho solving the "who's the sender of an rpc" could solve both + the SCM_CREDS implementation and the self case in procfs + +[[open_issues/SENDMSG_SCM_CREDS]], +[[hurd/translator/procfs/jkoenig/discussion]], *`/proc/self`*. + + pinotree: yes + but that would require servers impersonating users to some extent + and this seems against the hurd philosophy + and there was also the fact that you could create a + fake/different port when sending an rpc + to fake what ? + the sender identiy + *identity + what ? + you mean intermediate servers can do that + braunr: I don't know if I understand all the implications of + your question, but the magic server is the only hurd server that actually + implements fsys_forward (afaics), so why not use that? + teythoon: my question was rather about the principle + do people find it acceptable to entrust a server with their + authority or not + on the hurd, it's clearly wrong + but then it means you need special cases everywhere, usually + handled by glibc + and that's something i find wrong too + it restricts extensibility + the user can always change its libc at runtime, but in practice, + it's harder to perform than simply doing it in the server + braunr: then I think I didn't get the question at all + teythoon: it's kind of the same issue that you had with the mtab + translator + about showing or not some entries the user normally doesn't have + access to + this problem occurs when there is more than one server on the + execution path and the servers beyond the first one need credentials to + reply something meaningful + the /proc/self case is a perfect one + (conceptually, it's client -> procfs -> symlink) + 1/ procfs tells the client it needs to handle this specially, + which is what the hurd does with magic + 2/ procfs assumes the identity of the client and the symlink + translator can act as expected because of that + teythoon: what way do you find better ? + braunr: by "procfs assumes the identity" you mean procfs + impersonating the user? + yes + braunr: tbh I still do not see how this can be implemented at + all b/c the /proc/self symlink is not about identity (which can be + derived from the peropen struct initially created by fsys_getroot) but + the pid of the callee (which afaics is nowhere to be found) + s/callee/caller/ + the one doing the rpc + impersonating the user isn't only about identity + actually, it's impersonating the client + yes, client is the term >,< + so basically, asking proc about the properties of the process + being impersonated + proc o_O + it's not hard, it's just a big turn in the way the system would + function + teythoon: ? + you lost me somewhere + the client is the process + not the user + in order to implement /proc/self properly, one has to get the + process id of the process doing the /proc/self lookup, right? + yes + actually, we would even slice it more and have the client be a + thread + so how do you get to that piece of information at all? + the server inherits a special port designating the client, which + allows it to query proc about its properties, and assume it's identity in + servers such as auth + its* + ah, but that kind of functionality isn't there at the moment, is + it? + it's not, by design + right, hence my confusion + instead, servers use the magic translator to send a "retry with + special handling" message to clients + right, so the procfs could bounce that back to the libc handler + that of course knows its pid + yes + right, so now at last I got the whole question :) + :) + ugh, I just found the FS_RETRY_MAGICAL handler in the libc :-/ + ? + why "ugh" ? + well, I'm inclined to think this is the bad kind of magic ;) + do i need to look at the code to understand ? + ok, so I think option 1/ is easily implemented, option 2/ has + consequences that I cannot fully comprehend + same for me + no, but you yourself said that you do not like that kind of + logic being implemented in the libc + well + easily + i'm not so sure + it's easy to code, but i assume checking for magic replies has its + cost + why not? the code is doing a big switch over the retryname + supplied by the server + we could stuff getpid() logic in there + 14:50 < braunr> it's easy to code, but i assume checking for magic + replies has its cost + what kind of cost? computational cost? + yes + the big switch you mentioned + run every time a client gets a reply + (unless i'm mistaken) + a only for RETRY_MAGICAL replies + but you need to test for it + switch (retryname[0]) + { + case '/': + ... + that should compile to a jump table, so the cost of adding + another case should be minimal, no? + yes + but + it's even less than that + the real cost is checking for RETRY_MAGICAL + 14:55 < teythoon> a only for RETRY_MAGICAL replies + so it's basically a if + one if, right ? + no, it's switch'ing over doretry + you should pull up the code and see for yourself. it's in + hurd/lookup-retry.c + ok + well no, that's not what i'm looking for + it's not o_O + i'm looking for what triggers the call to lookup_retry + teythoon: hm ok, it's for lookups only, that's decent + teythoon: 1/ has the least security implications + yes + it could slightly be improved with e.g. a well defined interface + so a user could preload a library to extend it + extend the whole magic lookup thing? + yes + but that is no immediate concern, you are trying to fix + /proc/self, right? + no, i'm thinking about the big picture for x15/propel, keeping the + current design or doing something else + oh, okay + solving /proc/self looks actually very easy + well, I'd say this depends a lot on your trust model then + do you consider servers trusted? + (btw, will there be mutual authentication of clients/servers in + propel?) + there were very interesting discussions about that during the + l4hurd project + iirc, shapiro insisted that using a server without trusting it + (and there were specific terminology about trusting/relying/etc..) is + nonsense + teythoon: i haven't thought too much about that yet, for now it's + supposed to be similar to what the hurd does + hm, then again trust is not an on/off thing imho + ? + trusting someone to impersonate yourself is a very high level of + trust + s/is/requires/ + the mobile code paper suggests that mutual authentication might + be a good thing, and I tend to agree + i'll have to read that again + teythoon: for now (well, when i have time to work on it again + .. :)) + i'm focusing on the low level stuff, in a way that won't disturb + such high level features + teythoon: have you found something related to a thread-specific + port in the proc server ? + hurd/process.defs:297: /* You are not expected to understand + this. */ + \o/ + braunr: no, why would I (the thread related question) + braunr: yes, that comment also cought my eye :/ + teythoon: because you read a lot of the proc code lately + so maybe your view of it is better detailed than mine + + +## IRC, freenode, #hurd, 2013-09-13 + + * youpi crosses fingers + yay, still boots + teythoon: I'm getting a few spurious entries in /proc/mounts + none /servers/socket/26 /hurd/pfinet interface=/dev/eth0, etc. + /dev/ttyp0 /dev/ttyp0 /hurd/term name,/dev/ptyp0,type,pty-master 0 + 0 + /dev/sd1 /dev/cons ext2fs + writable,no-atime,no-inherit-dir-group,store-type=typed 0 0 + fortunately mount drops most of them + but not /dev/cons + spurious entries in df are getting more and more common on linux + too anyway... + ah, after a console restart, I don't have it any more + I'm getting df: `/dev/cons': Operation not supported instead + + +## IRC, freenode, #hurd, 2013-09-16 + + teythoon: e2fsck does not seem to be seeing that a given filesystem + is mounted + /dev/sd0s1 on /boot type ext2 (rw,no-inherit-dir-group) + and still # e2fsck -C 0 /dev/sd0s1 + e2fsck 1.42.8 (20-Jun-2013) + /dev/sd0s1 was not cleanly unmounted, check forced. + (yes, both /etc/mtab and /run/mtab point to /proc/mounts) + Yes, that is a "known" problem. + tschwinge: no, it's supposed to be fixed by the mtab translator :) + youpi: glibc's paths.h points to /var/run/mtab (for us) + youpi: Oh. But this is by means of mtab presence, and not by + proper locking? (Which is at least something, of course!) + /var/run points to /run + tschwinge: yes + anyway, got to run + + +## IRC, freenode, #hurd, 2013-09-20 + + teythoon: how come i see three mtab translators running ? + 6 now oO + looks like df -h spawns a few every time + yes, weird... + accessing /proc/mounts does actually + teythoon: more bug fixing for you :) + + +## IRC, freenode, #hurd, 2013-09-23 + + so it might be a problem with either libnetfs (which afaics has + never supported passive translator records before) or procfs, but tbh I + haven't investigated this yet diff --git a/community/gsoc/project_ideas/object_lookups.mdwn b/community/gsoc/project_ideas/object_lookups.mdwn index 5075f783..88ffc633 100644 --- a/community/gsoc/project_ideas/object_lookups.mdwn +++ b/community/gsoc/project_ideas/object_lookups.mdwn @@ -40,3 +40,32 @@ accurate measurements in a system that lacks modern profiling tools would also be helpful. Possible mentors: Richard Braun + + +# IRC, freenode, #hurd, 2013-09-18 + +In context of [[!message-id "20130918081345.GA13789@dalaran.sceen.net"]]. + + braunr: (wrt the gnumach HACK) funny, I was thinking about doind + the same for userspace servers, renaming ports to the address of the + associated object, saving the need for the hash table... + teythoon: see + http://darnassus.sceen.net/~hurd-web/community/gsoc/project_ideas/object_lookups/ + teythoon: my idea is to allow servers to set a label per port, + obtained at mesage recv time + because, yes, looking up an object twice is ridiculous + you normally still want port names to be close to 0 because it + allows some data structure optimizations + braunr: yes, I feared that ports should normally be smallish + integers and contigious at best + braunr: interesting that you say there that libihash suffers + from high collision rates + I've a theory to why that is, libihash doesn't do any hashing at + all + there are notes about that in the open_issues section of the + wiki + but I figured that this is probably ok for port names, as they + are small and contigious + braunr: That's called protected payload. + braunr: The idea is that the kernel appends data to the message in + flight. diff --git a/community/gsoc/project_ideas/sound/discussion.mdwn b/community/gsoc/project_ideas/sound/discussion.mdwn new file mode 100644 index 00000000..4a95eb62 --- /dev/null +++ b/community/gsoc/project_ideas/sound/discussion.mdwn @@ -0,0 +1,47 @@ +[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!taglink open_issue_documentation]]: update [[sound]] page. + + +# IRC, freenode, #hurd, 2013-09-01 + + I'm new to the hurd but I'd love to learn enough to work on sound + support. + + http://darnassus.sceen.net/~hurd-web/community/gsoc/project_ideas/sound/ + says drivers should be ported to GNU Mach as a first step. + Is this information still current or should the existing Linux + driver be wrapped with DDE instead? + if i recall correctly dde is currently only being used for + network drivers. i'm not sure how much work would be involved for sound + or usb + + +## IRC, freenode, #hurd, 2013-09-02 + + The sound support proposal + (http://darnassus.sceen.net/~hurd-web/community/gsoc/project_ideas/sound/) + recommends porting some other kernel's sound driver to GNU Mach. Is this + still current or should DDE be used instead? + rekado: dde or anything userspace-based is generally preferred + rekado: both are about porting some other kernel's sound driver + dde is preferred yes + This email says that sound drivers are already partly working with + DDE: http://os.inf.tu-dresden.de/pipermail/l4-hackers/2009/004291.html + So, should I just try to get some ALSA kernel parts to compile + with DDE? + well, what is missing is also the dde←→hurd glue + rekado: there is also a problem with pci arbitration + pinotree: I assumed DDEKit works with the hurd and we could use + any DDE/ glue code with it + * rekado looks up pci arbitration + only for networking atm + ah, I see. diff --git a/contributing.mdwn b/contributing.mdwn index b5ff6f3c..67df9d55 100644 --- a/contributing.mdwn +++ b/contributing.mdwn @@ -86,6 +86,8 @@ taken the time to fix it yet, but it shouldn't be very hard. The code begins at `hurd/pfinet/ethernet.c`, `ethernet_open()`, the `device_open` call, which produces `edev->ether_port`. Basically, one needs to catch errors like EIEIO when using it, and in that case re-open the device. +See also the notes on [[hurd/translator/pfinet/implementation]], *Bugs*, *IRC, +freenode, #hurd, 2013-09-03*. * Add a futex kernel trap to GNU Mach. This can be useful for nicer locking primitives, including inter-process primitives. `vm_allocate` can be used as an example in the `gnumach` source tree for how to add a kernel trap. [[!GNU_Savannah_task 6231]] @@ -106,6 +108,7 @@ part:1:file:/home/samy/tmp/foo`). This would be libnetfs-based. [[GSoC proposal|community/gsoc/project_ideas/valgrind ]] about this, but the basic port could be small. * Use libz and libbz2 in exec. See `hurd/exec/unzip.c` etc., they should be replaced by mere calls to libraries, [[!GNU_Savannah_task 6990]] +See also the discussions on [[open_issues/exec]]. * Add `/proc/$pid/maps`. `vminfo` already has this kind of information, it's a matter of making procfs do the same. [[!GNU_Savannah_bug 32770]] diff --git a/contributing/discussion.mdwn b/contributing/discussion.mdwn index 5a6bfd7c..11e8ac0e 100644 --- a/contributing/discussion.mdwn +++ b/contributing/discussion.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2011, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -19,3 +19,69 @@ Invent something. # Mailing Lists Add link to [[mailing_lists]] to page, and suggest following these. + + +# IRC, freenode, #hurd, 2013-08-05 + + hi guys, I'm new here. I'm a developer from Guile community, + and I think maybe it's a proper time to do some work to make GNU stuff + use Guile increasingly, but I found the wiki and docs seems a bit old, + and I can't find an entry from Hurd source, since there're too many + things. Anyone point me out? + thanks + nalaginrut what exactly is it that you need help with? + I've no idea, I saw MIG and I think if it's a language I can + write a front-end on Guile platform. But someone suggest me write hurd + binding will be a good start + I cloned incubator which is cl-binding for hurd, but I've no + idea too, since there's nothing in master branch + well, fixing guile on the hurd would be a start: + https://buildd.debian.org/status/package.php?p=guile-2.0 + i won't talk about this, as my personal opinion on the matter is + that it's not a proper time to do it + but at the same time, people should do what they're interested in + so feel free to do it + braunr: is there any reason why it's not a proper time? + nalaginrut: two words: mig sucks + so it'll be replaced by a new stuff? + any more reasons to have alternatives, no? + sure, please do it :) + actually it's more than just mig + the low level internals of the hurd are almost fine, but not good + enough to reliably develop over it + gccgo is currently proving it + and such projects are good opportunities to identify and fix such + issues + but the, if you want to work on guile, be prepared to work on a + lot more than just guile + I'm afraid I have to collect the reasons and evaluate when is + proper to do that, if Hurd has to be redesigned, it is not a proper time + ;-) + it also happened with openjdk, jeremie had to fix signals (!) + anyway, I just want a suggestion how to start + well, fixing guile on the hurd would be a start: + https://buildd.debian.org/status/package.php?p=guile-2.0 + ok, I'll try + nalaginrut: "incubator" is a somewhat strange beast. every branch + in there is a completely different project. you have to find the right + branch for the CL bindings... + antrik: thanks for reply, I guess it's clisp branch? + nalaginrut: + http://www.gnu.org/software/hurd/source_repositories/incubator.html + nalaginrut: sounds like it :-) + braunr: I'm believe it's important to encourage work on as many + different levels as possible. there is no motivation for fixing low-level + issues unless there are some interesting high-level things relying on + these... + antrik: i agree + 11:50 < braunr> but at the same time, people should do what + they're interested in + in fact, it's pretty much impossible to identify what we really + need at the lower levels unless working on high-level stuff as well... + yes + 11:57 < braunr> but the, if you want to work on guile, be prepared + to work on a lot more than just guile + I prepare to work on Hurd, is that an fair answer? + nalaginrut: perfect! ;-) + ;-) well, easy to say, but I'll try what I can do + yeah, just see how far you get. might be an interesting ride :-) diff --git a/faq/still_useful.mdwn b/faq/still_useful.mdwn index 8d7e3f28..d08d2df7 100644 --- a/faq/still_useful.mdwn +++ b/faq/still_useful.mdwn @@ -68,6 +68,6 @@ various servers are designed for this sort of modification. > drivers are actually Linux drivers running in a separate userland process. > It also for instance provides very fine-grain virtualization support, such as -> VPN for only one process, etc. +> [[VPN for only one process|open_issues/virtualization/networking]], etc. > etc. etc. The implications are really very diverse... diff --git a/faq/system_port.mdwn b/faq/system_port.mdwn index fc710a3e..ca96697c 100644 --- a/faq/system_port.mdwn +++ b/faq/system_port.mdwn @@ -47,3 +47,27 @@ Mach run as a POSIX user-space process|open_issues/mach_on_top_of_posix]], or by implementing the [[Mach IPC|microkernel/mach/ipc]] facility (as well as several others) as Linux kernel modules. While there have been some experiments, no such port has been completed yet. + + +# IRC, freenode, #hurd, 2013-09-05 + + what would be required to port the hurd to sparc? + port gnumach, write the sparc bits of mach/hurd in glibc, and + maybe some small parts in hurd itself too + what would be required to port gnumach? :-) + a new arch/ directory + bootstrap code + pmap (mmu handling) code + trap handling + basic device support (timers for example) + besides, sparc is a weird beast + so expect to need to work around tricky issues + in addition, sparc is dead + mmm + it's not totally dead + the T1 chips and their decendents are still in production + the thing is I'd like to have real hardware for the hurd + and if I'm going to have two machines running at once, I'd rather one + of them was my UltraSPARC box :-) + rah: unless you work hard on it, it's unlikely you'll get it + braunr: of course diff --git a/glibc/signal/signal_thread.mdwn b/glibc/signal/signal_thread.mdwn index c6e8d69e..544d387d 100644 --- a/glibc/signal/signal_thread.mdwn +++ b/glibc/signal/signal_thread.mdwn @@ -13,12 +13,11 @@ invoker of `kill` to the target process. The target process' [[signal_thread]] job is it to listen to such messages and to set up signal handler contexts in other threads. ---- - -[[!tag open_issue_documentation]] # IRC, freenode, #hurd, 2011-04-20 +[[!tag open_issue_documentation]] + bugs around signals are very tricky signals are actually the most hairy part of the hurd and the reason they're aynchronous is that they're handled by a @@ -50,3 +49,43 @@ other threads. mach and the hurd were intended to be "hyperthreaded" [[open_issues/multithreading]]. + + +# IRC, freenode, #hurd, 2013-09-17 + + I just realized that I know next to nothing about signal + handling on the Hurd... + especially /hurd/inits role in it + reading glibcs kill.c it does not involve /hurd/init at all, but + /hurd/init is full of proxying code for the msg protocol + ah, /hurd/init mitms the signal handling logic in the libc for + its own signals + for msg_sig_post it sends a reply immediately, and then + processes the signal, I wonder why that is done + also it "forwards" any signals it receives to the child it + spawned (like /etc/hurd/runsystem), I wonder why... + good thing the comments tell what is done, not why... + so in theory kill -HUP 1 should have been forwarded to the + "runsystem" process, I wonder why that does not work if that one execs + sysvinit + teythoon: can't help you there :/ + braunr: I think I sorted it out on my own, we'll see how that + works out in practice ;) + good + + +## IRC, freenode, #hurd, 2013-09-18 + + braunr: I figured out why /hurd/init does this strange thing + with the msg protocol + braunr: it has no signal thread + I wonder how /hurd/exec and the initial filesystem handle + this... + err, afaics the signal thread is created in fork(), so any + process not created using it (ie manually using task_create) should lack + the signal thread, no? + that'd be the root fs, /hurd/{exec,init,auth,proc} and + /etc/hurd/runsystem (the child started by /hurd/init) + but I see only /hurd/init doing something about it, namely + setting a msgport and handling the msg protocol, relaying any messages to + the signal handling logic in the glibc diff --git a/hurd/debugging/rpctrace.mdwn b/hurd/debugging/rpctrace.mdwn index 1570df4c..d62a4387 100644 --- a/hurd/debugging/rpctrace.mdwn +++ b/hurd/debugging/rpctrace.mdwn @@ -16,6 +16,17 @@ doing. See `rpctrace --help` about how to use it. +# IRC, freenode, #hurd, 2013-07-29 + + about rpctrace, it poses as the kernel for its children, parses + and relays any messages sent over the childrens message port, right? + teythoon: rpctrace doesn't "poses as the kernel" + well, it's close enough + but it intercepts messages send by its children by handing them + a message port different from the one provided by the kernel, doesn't it? + yes + + # Issues and Patches [[!tag open_issue_hurd]] @@ -186,6 +197,34 @@ See `rpctrace --help` about how to use it. wish: make rpctrace decode the results of io_stat rpcs +* IRC, freenode, #hurd, 2013-07-29 + + imho rpctrace is kind of a mess right now :-/ we should move the + parsing code to a library + that would also be useful for valgrind, it should have to do + basically the same + +* IRC, freenode, #hurd, 2013-07-29 + + and I tried to rpctrace a subhurd, but rpctrace died on a + assertion failure, some msg had an unexpected type or something + rpctrace dies on select + and guess what, the boot tool does call select on the console it + emulates + that's a shame, that'd be really useful for me + it might not be hard to fix + but i've never looked into it :/ + i only saw that rpctrace expects the common RPC message types + and select is all but a common RPC + so the type of the messages involved is slightly different + and the assertion chokes on that + rpctrace.c is huge and hand written, it'd be nice if the parser + was created from the procedure definitions + and thinking of that, mig does exactly that, one would only need + some glue code + select is partially hand written + but it's a special case so that's ok + # See Also diff --git a/hurd/libstore.mdwn b/hurd/libstore.mdwn index 8eac39fe..b2e7f7a9 100644 --- a/hurd/libstore.mdwn +++ b/hurd/libstore.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2007, 2008, 2009 Free Software Foundation, +[[!meta copyright="Copyright © 2007, 2008, 2009, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable @@ -6,8 +6,8 @@ id="license" text="Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.2 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license -is included in the section entitled -[[GNU Free Documentation License|/fdl]]."]]"""]] +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] `libstore` is used to provide a generic interface to access data (read/write) on backing stores. @@ -15,6 +15,8 @@ on backing stores. It more than just a thin layer between [[GNU Mach|microkernel/mach/gnumach]] devices (`hd0` for example) and the device node below `/dev/`... +[[!toc]] + # Available Stores @@ -34,3 +36,32 @@ can be found. pages="hurd/libstore/examples/* and !*/discussion" show=0 feeds=no]] + + +# Open Issues + +## IRC, freenode, #hurd, 2013-07-29 + +[[!tag open_issue_documentation open_issue_hurd]] + + and I read hammys paper about mobile code, is it true that the + store code is loaded into the client? who is the server and who is the + client in this context? + teythoon: "store code" ? + libstore + the hurd libstore ? + yes + hum, what paper ? + braunr: + http://users.student.lth.se/cs07fh9/2009-hammar-hurd-mobility.pdf + how nice + braunr: http://www.gnu.org/software/hurd/news/2010-01-31.html + it raises an important point btw, the authentication done by + processes on the Hurd is one sided, only the client authenticates at the + server + yes + It'S also mentioned in + http://www.gnu.org/software/hurd/hurd/documentation.html -- but of + course, any results he got from his work really should be integrated more + properly into the existing body of documents. + As with so many other documents/discussions/etc. ;-| diff --git a/hurd/libstore/part.mdwn b/hurd/libstore/part.mdwn index 5260d05d..29ef9072 100644 --- a/hurd/libstore/part.mdwn +++ b/hurd/libstore/part.mdwn @@ -1,4 +1,5 @@ -[[!meta copyright="Copyright © 2010, 2012 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2010, 2012, 2013 Free Software Foundation, +Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -28,6 +29,132 @@ A similar problem is described in [[community/gsoc/project_ideas/unionfs_boot]], and needs to be implemented. -# TODO +# Open Issues -How to use, etc. +## Documentation + +[[!tag open_issue_documentation]] + +## [[open_issues/hurd_build_without_parted]] + +## IRC, freenode, #hurd. 2013-09-21 + + Hello, guys. Is there a way to know where partition starts on + hurd. E.g. given hd0s1 get "2048 sectors" + yes, it's the storeinfo RPC + let me find you a pointer + in GRUB 2 files for determining device relations are a mess of + #if's. I try to split it into logical files and make common logic + uniform. Current Hurd's logic is completely different and, actually, + wrong. Same logic is used by Mac OS X part ... + phcoder: Mmm, I guess you never got the userland-part.patch + upstream + ah, yes ,you did + I mean the find_hurd_root_device function + grub was previously using file_get_storage_info + youpi: find_hurd_root_Device/file_get_storage info is about + translating / -> /dev/hd0s1. Current problem is in step hd0s1 -> + hd0,msdos1 + yes, but iirc file_get_storage_info might work for hd0s1 itself + I see, let me try this + youpi: file_get_storage gives offset=0 size=partition size + (file_get_storage) damn + and name=hd0s1 + ah, that might be because we're still using in-kernel partition + table, instead of the parted partition table + looks like file_get_storage would be useful to get block size + though + youpi: is parted already used in some cases? Any reliable way to + check for it? Any way to access kernel partition map? Ioctl? RPC to + kernel? + the parted table is only enabled in the debian installer for + now. You can set up one for yourself by running e.g. settrans -c + /tmp/myhd0s1 /hurd/storeio -T typed part:1:device:hd0 + I don't think there is any ioctl/RPC to get the kernel partition + table + youpi: is it using Linux partition code with some glue? + phcoder: the kernel partition table, yes + youpi: that's bad. it's probably one of the least consistent + numbering schemes. It would imply that it only worked because only + simplest cases were ever tested + I know + that's why we want to migrate to the parted-based partition table + support + (which also brings us much better support than the old linux2.0 + code :) ) + youpi: I've looked into code and must say that I dislike what I + see: partitions handled in ide/ahci/sd/... + phcoder: which code? + youpi: gnumach + sure, that's not what we want in the end + grep -r start_sect + it's just the legacy linux way of doing partition support + Well Linux at least gives a meaningful ioctl + couldn't find any hint of it in gnumach + we didn't bother to add one since the parted way is supposed to be + what we have in the end + youpi: I can't make our code follow sth that might be the case in + the future + why not? + that's the way we will go + it's not just hypothetic + we just can't continue maintaining disk drivers in the kernel + so it won't be in the kernel + youpi: if I do then GRUB won't work on current GNU/Hurd anymore + can't you also keep the old code? + as a fallback when the proper way does not work (yet) + More hairs... :( + How do I check for it? offset == 0 isn't proper as partitions may + start at 0 + but checking than name still refers to partition is probably the + right way + I don't see what you mean + (about name) + youpi: I mean that we need a way to know that current code is + used and not future parted-based code + phcoder: I understand that for the offset ==0 thing + but I didn't understand the phrase you wrote just after that + youpi: file_get_storage gives back a name. If this name is the + same as the partition we requested in the first place then it's current + code + ah, ok + yes, if the name is the same, it means it's not actually a + partition + youpi: current gnumach code makes fake devices out of partitions + yes + youpi: with settrans command you told, I get num_ints = 0 + phcoder: odd, I do get information, e.g.: + hurd:/tmp# settrans -c /tmp/mysd0s1 /hurd/storeio -T typed + part:1:device:sd0 + hurd:/tmp# storeinfo mysd0s1 + device (0x200): sd0: 512: 83905: 42959360: 63+83905 + storeinfo: myhd0s1: Operation not supported + do you actually have an hd0 device? + yes + youpi: I typed parted instead of part + Now it works + good :) + youpi: what is expected timeline on migration to part interface? + there's no real timeline + like everything, it'll happen when somebody actually looks at how + to achieve it + perhaps it'll be easy, perhaps not. IIRC there is still an issue + with the swapper + youpi: sounds like we're stuck will fallback code for at least + couple of years + possibly, entirely depends on people taking the task + if that becomes really pressing at some point, I'll have to do it, + but of course, I can not magically do everything in a glimpse + youpi: it's not pressing but just be aware that unusual + partitioning is likely to fail. Probably not huge issue. As to its place + in our code it's not ideal but it's not the only case of suboptimal + construction for specific systems (what we had to do because of Linux + caching is terrifying). I'm not going to make hurd code a scapegot of + more generic problem + youpi: and since we very rarely drop support this code is + probably stuck for good + as long as it's not used whenever we get to move to parted-based + partitioning, it's not too bad + youpi: and Mac OS X/Darwin case is even worse. Apparently they + deprecated their *BSD functions (which probably don't work since they + don't use BSD labels) without giving any replacement. diff --git a/hurd/running/debian/dhcp.mdwn b/hurd/running/debian/dhcp.mdwn index afa46799..849ff382 100644 --- a/hurd/running/debian/dhcp.mdwn +++ b/hurd/running/debian/dhcp.mdwn @@ -1,4 +1,5 @@ -[[!meta copyright="Copyright © 2011, 2012 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2011, 2012, 2013 Free Software Foundation, +Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -29,3 +30,97 @@ scripts, but has its own `/libexec/rc` script -- which integrates scripts from * [[!debbug 616290]] * [[Proper Hurdy DHCP support|hurd/translator/pfinet/dhcp]] + + * [[!message-id desc="dhclient aborting with a stack smashing error" + "874ngfvwn4.fsf@kepler.schwinge.homeip.net"]] + + IRC, freenode, #hurd, 2013-08-21: + + yay, I fixed the path of the dhcp leases file... + ... and now dhclient dies of a buffer overflow + fortunately the fix is rather simple, anyone who cares about + the security of his box just has to stop using isc software + the code is full of stuff like char foo[100]; /* surely + that's enough */ + note that our version of isc-dchp (the one in ports) is + older than the latest one available in unstable (which is still older + than the latest upstream releases) + so? + dunno, might have been fixed or not + ^^ yeah sure + A lot of software has these limitations and PATH_MAX, + MAXPATHLEN issues :( + having a limitation is not a problem per-se + no, only software written in c has these kind of problems + the problem is not checking whether the limits are hit + well, looking at the source of isc-dhcp my time is better + spent making another dhcp client work on hurd + also reading up on bug #616290 does make me want to avoid + touching it ever + hehe + teythoon: somebody was offering an alternative to the isc + dhcpclient, but I think it was rejected by Samuel? + why would he do that? + probably for compliance + He probably thought they would release a new version soon, is + 4.3.0 out yet? + well, as soon as my fixes for ifupdown go in, dhclient will + start crashing + no, there is no new version released + no major one that is + 4.2.5 is out + can't you just increase the buffer size, where is the problem + exactly? + I have no idea + The Hurd patches are not in 4.2.5, they were promised for + 4.3.0a1. + Still the buffer overflow problem might be present in 4.2.5 + if patched to build on Hurd. + there, darnassus now has a fully featured git/gitweb service + :) + btw, I managed to reproduce the crash reliably + rm /var/lib/dhcp/*; dhclient -v /dev/eth0 ... *boom* + ditch the -v, everything works, and now that there is a + lease file, you can add the -v again and it works + ew :) + and what has dhclient.c to say for its defense? + log_info("%s", ""); + hm, not much :/ + + IRC, freenode, #hurd, 2013-08-22: + + uh, the isc-dhcp situation is a huge pita, the source on + -ports does not compile anymore :/ + + IRC, freenode, #hurd, 2013-08-23: + + teythoon: Was it the slash in the network interface names + that caused the buffer overflow in dhclient? + gnu_srs: no, previously no dhcp leases file was written and + everything was fine + teythoon: did you really develop your patch against that old + version of ifupdown? + gnu_srs: now it is written, and for some reason dhclient + crashes *iff* -v is given *and* there is no previous lease file + pinotree: no, I did not. that was only reportbug including + information from my desktop machine without asking me + but when I first looked at ifupdown it was still a 6000 + lines noweb file >,< + that was fun + which version is it against? + hg tip + + IRC, freenode, #hurd, 2013-08-30: + + teythoon: I understand correctly that you found that + id:"874ngfvwn4.fsf@kepler.schwinge.homeip.net" in fact was really + "just" a buffer overflow in the dhclient code? + tschwinge: ah, most interesting, I didn't realize that you + stumbled across this as well + to be honest I don't know what's going on there, I only + observed what I wrote in my report + for me it started crashing once the lease file was actually + a valid path (i.e. not to a non-existing directory b/c of the slashes + in /dev/eth0) + I tried to rebuild the package served on debian-ports, but + that failed diff --git a/hurd/subhurd.mdwn b/hurd/subhurd.mdwn index f2117ead..55bead37 100644 --- a/hurd/subhurd.mdwn +++ b/hurd/subhurd.mdwn @@ -1,5 +1,5 @@ -[[!meta copyright="Copyright © 2007, 2008, 2010, 2011 Free Software Foundation, -Inc."]] +[[!meta copyright="Copyright © 2007, 2008, 2010, 2011, 2013 Free Software +Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -42,6 +42,22 @@ set up another Hurd on a different partition, without ever rebooting. (You can run the `native-install` step from a chroot or already in a subhurd.) +### IRC, freenode, #hurd, 2013-09-15 + + Never dared to try a subhurd, any link to the howto? + gnu_srs: I followed + http://www.gnu.org/software/hurd/hurd/subhurd.html though using crosshurd + didn't work for me, I just used debootstrap + gnu_srs: and you need a separate filesystem translator (i.e. not + /) for that + the easiest way is to add another virtual disk to you qemu setup + use the qemu image directly + simplest way to set up a subhurd + just change fstab from the host before the first boot to avoid + making the subhurd use the same hd0 drive as the host + braunr: nice idea :) + + ## Booting To boot the subhurd, you need a boot script. For historical reasons, usually @@ -118,6 +134,362 @@ look at the number of threads (e.g. using `ps -l`), as many servers have very characteristic thread counts. +### IRC, freenode, #hurd, 2013-08-09 + + btw, is there a way to get dde-based networking into a subhurd? + the wiki instructions look like they're for the mach driver + and starting the dde translator inside the subhurd does not work + for me + that's probably a good thing though + the netdde process will need privileged access to mach + for hardware access + you can't easily use netdde from a subhurd, unless with a + different nic + i usually rebuild mach with in kernel devices so both the main and + subhurd can share on nic + one* + could a port to netdde perhaps forwarded to the subhurd? + zengh da wrote the eth-multiplexer for that iirc + it's a matter of making it appear as an eth0 device on the master + port aiui + zheng* + yes, I looked at that + what is the master port? + on a plain hurd system it's the port that privileged processes can + use to access mach devices + in a subhurd, it's the same for the subhurd, to access some devices + that you choose to give access to + its real name is the "device master port" + ah yes + + +#### IRC, freenode, #hurd, 2013-08-10 + + teythoon: use eth-multiplexer to use the NIC within a + subhurd. that's exactly what it was created for. + I don't remember whether it's even possible to share a "raw" + netdde device... I don't think I ever tried that; and I don't remember + enough of the theory to tell whether it should be possible + but I really don't see the reason to, when eth-multiplexer is + available + (IMHO running an eth-multiplexer on top of netdde should be the + default setup in fact) + as for actually passing on the device, that should be perfectly + possible with zhengda's modified subhurd... but I don't remember whether + that was ever merged upstream + (you will definitely need that for using netdde in a subhurd, + regardless whether through eth-multiplexer or directly) + + +#### IRC, freenode, #hurd, 2013-09-15 + + I wonder if we can modify the boot program so that it passes + ports from the mother hurd to the subhurd + so that we could pass in a port to the eth-multiplexer + or use like /hurd/remap as the root translator for the subhurd + eth-multiplexer was created exactly for that iirc, + so it's probably already done somewhere + + +#### IRC, freenode, #hurd, 2013-09-16 + + braunr: regarding subhurd did you mean to install + sthibault/hurd-i386/debian-hurd.img.tar.gz + on a separate partition and booting using the instructions for + subhurds on the web. + gnu_srs: yes + be careful that the subhurd doesn't use the same partition as the + main hurd, that's all + what about changing fstab? + 12:17 < braunr> be careful that the subhurd doesn't use the same + partition as the main hurd, that's all + gnu_srs: yes, you need to change the fstab + currently it is used for fscking stuff, so if it points to your + main partition it will cause severe corruption + gnu_srs: you also have to specify the right partition in the + servers.boot file + fstab of the subhurd image? + yes + how to unpack the .img file (just to be sure)? + gnu_srs: you don't need to, just use the img file as secondary + hard disk image + Then how should I be able to change fstab of the image? + boot your hurd box, mount the partition and change it + I missed something here: on my partition /my_chroot I have have + the file debian-hurd-20130504.img + gnu_srs: ah, you copied it to the partition, braunr meant to use + it as the secondary disk, e.g. qemu ... -hdb debian-hurd-20130504.img ... + That is the same as installing another cd image, where does the + subhurd come into play? + mount the partition on the secondary hd, fix the fstab there, + mount it r/o, get the servers.boot file from the wiki, modify it so that + it points to the right partition, execute boot servers.boot /dev/, probably /dev/hd1s1 + BTW: unpacking was problematic: tar: debian-hurd-20130504.img: + Cannot seek to 2147696640 (2G limitations) + I wonder why you did this on your hurd system in the first + place... + I thought I could use that partition, /my_chroot as a chroot + place. So it won't work for subhurds? + well, there are several ways to setup a subhurd. one is to + already have a spare partition for that and use crosshurd or as I did + debootstrap to install a debian system there + braunr suggested an even easier way, download the .img file and + use it as secondary hard disk + you ended up doing kind of both + I tried once with debootstrap and that created a disaster... + how so? + The install errored out, and the whole filesystem (including /) + was left in a broken state. Maybe I tried + that without using a separate partition. Don't remember any + longer. So you say it's safe now? + I used it successfully to setup my subhurd + and you have your subhurd in a separate partition, installed from + there too, as root? + the web page only mentions crosshurd, and that failed for you? + yes, having a separate partition is (currently) necessary to run + a subhurd + yes, I used debootstrap as root, afaics that is necessary + and yes, as I said the other day, I tried crosshurd first and it + failed + then again, I fail to see any reason to use crosshurd these days + it's only a wrapper around debootstrap anyway, using it with + --foreign and fixing up stuff later + one has more control over the process if one uses debootstrap + directly + I still don't dare to do it yet. I'll create another image using + netinst with a separate partition and try out first. + When installing a new image using netinst.iso (2013-06-30) and + rebooting /proc does not get mounted? + gnu_srs: is that a statement or a question? + A statement. + it's not customary to end statements with question marks ;) + s/mounted?/mounted, why?/ + well, you seem to be the last person to perform such an + installation, so you are in the perfect position to answer this question. + cat /var/log/dmesg? + On other images I have: fsysopts /proc; /hurd/procfs + --clk-tck=100 --stat-mode=444 --fake-self=1 + gnu_srs: no, check the installation log + gnu_srs: and what does showtrans say? + showtrans /proc; + which log file to look for? + the installation log, somewhere in /var/log probably + I only find /proc in /var/log/installer/syslog, mainly printing + out errors not finding /proc/mounts + iirc the /proc translator should be set during the hurd package + configuration + you should probably look for that part in the log + Setting up translators: /hurd/exec /hurd/proxy-defpager + /hurd/pflocal (+link) /hurd/pfinet (+link) (+link) /hurd/procfs -c + /hurd/password crash-kill crash-suspend crash-dump-core crash. + that part + debootstrap: /hurd/procfs -c and in-target: /hurd/procfs -c No + errors + I don't understand what that means + please explain in more details + see: http://paste.debian.net/41195/ + makes much more sense :) + Where is the 'Setting up translators' done? I cannot find + anything in /var/lib/dpkg/info/hurd* or /etc/init.d/... + /usr/lib/hurd/setup-translators, called in hurd.postinst + tks:) + Hi, when installing a new image with debootstrap to /chroot the + script boot/servers.boot is already there (as well as in /boot/ + grub) + Is it OK to use that file to boot the subhurd? + using /boot/servers.boot or /chroot/boot/servers.boot (if the + /chroot partition is unmounted it cannot be used?) + and how to unmount /chroot: umount does not work? + braunr: I'm also trying to find out what's wrong with glibc, when + my subhurd is up and running 2.13-39 (if possible) + I know I should issue settrans command, but I'm not yet fluent in + translators. + sorry:-/ + Now this, after a reboot: unknown code P 30 while trying to open + /dev/hd0s3 (/chroot) + Disk write protected: use the -n option to do a read-only check + of the device. + fsysopts /dev/&hd0s1 --writable: Operation not supported?? + OK, I'm giving up for now, no subhurd:-( and a broken install. + Which terminal to use in rescue mode, TERM is not set, + dumb,mach,hurd does not work with nano? + e2fsck /dev/ho0s3; e2fsck: Unknown code P 2 while trying to open + /dev/ho0s3; Possibly non-existent device? + mke2fs /dev/hd0s3; /dev/hd0s3 is not a block special device.; + Proceed anyway? (y,n) n: What's going on (hd0s3 not mounted)?? + anybody, help? + after removing and creating the partition again:mke2fs + /dev/hd0s3, , mke2fs: Unknown code P 13 while trying to determine + filesystem size: What's going on? + Where to find the glibc-2.13 versions which used to be at + debian-ports?. + seems they can be found on snapshot.debian.org + + +#### IRC, freenode, #hurd, 2013-09-17 + + teythoon: Installing subhurd via debootstrap on partition + /chroot fails miserably. Install hangs, and after reboot \rm -r + /chroot/* fails for dev and proc + Are there translators running there already? I have not + booted the subhurd. + translators for hd0s3 (/chroot) are storeio and + ex2fs.static. Do I have to stop them to be able to clean out + /chroot? + mount -v /chroot; settrans -a /chroot /hurd/ext2fs + /dev/hd0s3; + ext2fs: /dev/hd0s3: panic: main: device too small for + superblock (0 bytes); + mount: cannot start translator /hurd/ext2fs: Translator + died + Please, somebody! + don't ask to ask, just ask, right? + we've already told you everything you need + just get it right + for example, i told you to be careful about fstab so that + the subhurd wouldn't use the main hurd partition + but you managed to screw that + good job + I installed the subhurd in a partition /chroot /dev/hd0s3 + using debootstrap + i don't know deboostrap, it may be broken, use the disk + image youpi maintains + ant the install screwed up with debootstrap + ok; then I cannot use a partition, but another disk in + kvm, e.g. hdb? + gnu_srs: hd1 + something is fishy with glibc, definitely, that's why I'm + trying to set up a subhurd to revert to 2.13-39 + hi, when trying to boot a subhurd: /hurd/ext2fs.static: + hd0s3: Gratuitous error; bye + gnu_srs: why hd0s3 ? + it should be hd1s1 + I'm still using a separate partition /my_chroot + /hd0s3. Will switch to hd1 next. teythoon? + the servers.boot script use absolute + paths:/hurd/ext2fs.static and /lib/ld.so.1 /hurd/exec, + shouldn't they be relative to /my_chroot? + no + they're actually from your host + teythoon: please, how did you succeed to boot a subhurd + in a partition? + using debootstrap + gnu_srs: from my shell history: + : 1374672426:0;debootstrap sid /mnt + http://http.debian.net/debian/ + : 1374673020:0;cp /etc/hosts /etc/resolv.conf /mnt/etc + : 1374673048:0;cp /etc/passwd /etc/shadow /mnt/etc + teythoon: so it does work fine ? + great + yes, why wouldn't it? + gnu_srs: I then remounted that partition r/o and used + the servers.boot file from the wiki to boot it + braunr: why wouldn't it? (you do mean the debootstrap + part, don't you?) + teythoon: i don't know + i've heard it wasn't maintained any more + not being maintained is a good reason for something to + become unusable/untrustable with time + o_O it is at the heart of d-i, isn't it? + I actually do most Debian installations using + debootstrap directly + ah + ok :) + teythoon: even hurd ones ? + braunr: well, just the subhurd installation, but that + went as expected + good + Finally: I found the reason for Gratuitous error, I used + the /boot/servers.boot script, + that being different to the one on the wiki:-/ + Is it possible to copy files between a host hurd and + subhurd, what about access to eth0? + Hi, when starting the subhurd I see some warnings/error: + http://paste.debian.net/41963/ + 1) A spelling error execunable-> executable + 2) libports: invalid destination port + 3) mach-defpager: another already running + "execunable" is not a typo, but just "exec" and "unable + ..." without a space-type character + OK, sounds more plausible + Ah, the printouts are mixed, no bug + When setting up nework in the subhurd: /hurd/pfinet: + file_name_lookup /dev/eth0: Translator died + /hurd/pfinet: device_open(/dev/eth0): (os/device) no such + device + settrans: /hurd/pfinet: Translator died + + +#### IRC, freenode, #hurd, 2013-09-18 + + priority does not matter much + memory manager is not really surprising, there's indeed already one + what is actually the problem? + So these are merely warnings? + gnu_srs: yes + Real problems are I cannot set up networking, e.g. wget ...: + Connecting to ... failed: Address family not supported by protocol. + gnu_srs: did you give the subhurd a network card? + How? + and do I need to set up fstab, for now it's empty. + I just installed the base with dbootstrap + gnu_srs: -f option of boot + e2fsck will need fstab for sure + otherwise it can't divine what should be checked + Why is the /boot/servers.boot different from the subhurd one on + the wiki? Is it used at all, I thought grub was in charge. + it's not used at all + maybe better to put in the subhurd one there then, with a + comment? + no, since /boot/servers.boot is supposed to be used for machine + boot + not subhurd boot + what about putting a copy of the suhurd one there, with a + different name? + probably a good idea, yes + matter of making it happen + the wiki page on subhurd does not say how to set up networking, + only that you can do it. + matter of adding the information + I remember it's the -f option of boot + make it work, and add the information for others + I could try, but don't know how to add a network card to the + subhurd, and e.g. how to set up swap + see -f option + of boot? + "gnu_srs: -f option of boot" + if you could read what we write, that'd make things happen way + faster + yes I saw your comment above, it was just to be 100% sure:-D + device_file=/dev/eth0 or something else? + eth0 is used by the host already + did you read boot --help ? + iirc it's not a problem, both will receive all frames + yes I did + then I don't see where you took device_file from + at least in that form + --device=device_name=device_file + that means rather something like --device=foo=bar + so -f /dev/eth0 is correct usage then? + didn't you see that in what I wrote, there was a "=" in there? + -f is the short option, --device is the long, I don't see the + need for = in the short option? + in the long option there are *two* = + yes, but in the short no? + why not? + long -> short usually drops one = + to summarize: -f=/dev/eth0 or --device=eth_sub=/dev/eth0? + why shouldn't there be a eth_sub in the short version? + 10:15:49) youpi: long -> short usually drops one = + yes, it drops the = + but nothing else + if the long option needs some information, the short needs it too? + -? + correct now? -f eth_sub=/dev/eth0 or --device=eth_sub=/dev/eth0? + yes + k! + + # Further Info Read about using a subhurd for [[debugging_purposes|debugging/subhurd]]. diff --git a/hurd/translator.mdwn b/hurd/translator.mdwn index 37dac794..da141dc2 100644 --- a/hurd/translator.mdwn +++ b/hurd/translator.mdwn @@ -92,16 +92,19 @@ The [[concept|concepts]] of translators creates its own problems, too: * [[exec]] * [[proc]] * [[pfinet]] +* [[eth-filter]] * [[pflocal]] * [[hostmux]] * [[storeio]] * [[ext2fs]] * [[fatfs]] +* [[ufs]] * [[magic]] * [[unionfs]] * [[nfs]] * [[symlink]] * [[firmlink]] +* [[fifo]] * ... @@ -124,7 +127,7 @@ The [[concept|concepts]] of translators creates its own problems, too: *These Translators are available in the [hurdextras repository](http://savannah.nongnu.org/cvs/?group=hurdextras) but not yet described on this website. They are in varying stages of Development.* * [jfs](http://www.nongnu.org/hurdextras/#jfs) -* [httpfs](http://www.nongnu.org/hurdextras/#httpfs) +* [[httpfs]] * [memfs](http://www.nongnu.org/hurdextras/#memfs) * [notice](http://www.nongnu.org/hurdextras/#notice) * [pith](http://www.nongnu.org/hurdextras/#pith) diff --git a/hurd/translator/eth-filter.mdwn b/hurd/translator/eth-filter.mdwn new file mode 100644 index 00000000..4f59b402 --- /dev/null +++ b/hurd/translator/eth-filter.mdwn @@ -0,0 +1,26 @@ +[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +# IRC, freenode, #hurd, 2013-07-27 + +[[!tag open_issue_hurd]] + + ok, so as usual we actually *already* have a firewall + it's the eth-filter translator from zheng da + it has just never been really pushed forward... + good news :) + well, the bad news is that it probably doesn't support connection + tracking + since it's just bpf + using the libpcap syntax + well, a stateless fw should do for Debian/Hurds needs for now, + right? + yes + and it does work indeed diff --git a/hurd/translator/examples.mdwn b/hurd/translator/examples.mdwn index 867d4935..4947808e 100644 --- a/hurd/translator/examples.mdwn +++ b/hurd/translator/examples.mdwn @@ -16,7 +16,7 @@ or [hurd-extras](http://www.nongnu.org/hurdextras/). cvs -z3 -d:pserver:anonymous@cvs.savannah.nongnu.org:/sources/hurdextras co -* httpfs translator +* [[httpfs]] translator @@ -28,7 +28,7 @@ or $ cd tmp/ $ ls -l -* ftpfs translator +* [[ftpfs]] translator @@ -67,13 +67,13 @@ This is not as fast as `tar czvf newfile.tar.gz all my files`, but at least it's $ settrans -fgca /servers/socket/2 /hurd/pfinet -i -a -m -g -* Console translator -- setting up virtual consoles +* [[Console]] translator -- setting up virtual consoles $ console -d vga -d pc_mouse -d pc_kbd -d generic_speaker /dev/vcs -* iso9660fs translator -- 'mounting' your cdrom +* [[iso9660fs]] translator -- 'mounting' your cdrom diff --git a/hurd/translator/exec.mdwn b/hurd/translator/exec.mdwn index 54abba7e..1dc0ea26 100644 --- a/hurd/translator/exec.mdwn +++ b/hurd/translator/exec.mdwn @@ -1,4 +1,5 @@ -[[!meta copyright="Copyright © 2009, 2012 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2009, 2012, 2013 Free Software Foundation, +Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -11,4 +12,9 @@ License|/fdl]]."]]"""]] The *exec* server, listening on `/servers/exec`, is responsible for preparing the execution of processes. + +# Open Issues + + * [[open_issues/exec]]. + * [[open_issues/exec_memory_leaks]]. diff --git a/hurd/translator/ext2fs.mdwn b/hurd/translator/ext2fs.mdwn index 20faed5e..e2f6b044 100644 --- a/hurd/translator/ext2fs.mdwn +++ b/hurd/translator/ext2fs.mdwn @@ -179,6 +179,69 @@ small backend stores, like floppy devices. That would be a nice improvement, but only after writeback throttling is implemented. +## Stripped vs. Unstripped `ext2fs.static` + +[[!tag open_issue_hurd]] + + +### IRC, freenode, #hurd, 2013-09-17 + + I always had some trouble with dropping a rebuild ext2fs.static + into my test system and I never figured out why + I just followed a hunch and stripped the binary, and all of the + sudden it works + any ideas why? + teythoon: I quick search found me: + and + . + tschwinge: ugh, thanks for the pointers ;) + teythoon: They won't help too much I fear. Anyway, good + intuition (or whatever) ;-) that you found this out. + teythoon: Not exactly related to stripped/unstripped per se + (that is, debug information), but in the past we've had an issue about + relro (see binutils/glibc, ), + where a variable (that erroneously happend to be in such a read-only + section, if I remember correct) was tried to be modified -- which worked + "sometimes": depending on where exactly it was located in the binary + (which shifted around a page + boundary by stripped/unstripped), it'd segfault or not. Burnt + several days on that before Samuel (IIRC) eventually figured it out. + tschwinge: well, thanks anyway ;) + + +## Increased Memory Consumption + +### IRC, freenode, #hurd, 2013-09-18 + + ext2fs is using a ginormous amount of memory on darnassus since i + last updated the hurd package :/ + i wonder if my ext2fs large store patches rework have introduced a + regression + the order of magnitude here is around 1.5G virtual space :/ + it used to take up to 3 times less before that + looks like my patches didn't make it into the latest hurd package + teythoon: looks like there definitely is a new leak in ext2fs + :/ + memory only + the number of ports looks stable relative to file system usage + braunr: I tested my patches on my development machine, it's up + for 14 days (yay libvirt :) and never encountered problems like this + i've been building glibc to reach that state + hm, that's a heavy load indeed + could be the file name tracking stuff, I tried to make sure that + everything is freed, but I might have missed something + teythoon: simply running htop run shows a slight, regular increase + in physical memory usage in ext2fs + old procfs stikes again? :) + braunr: I see that as well... curious... + 16:46 < teythoon> could be the file name tracking stuff, I tried + to make sure that everything is freed, but I might have missed something + how knows, maybe completely unrelated + the tracking patch isn't that big, I've gone over it twice today + and it still seems reasonable to me + hm + + # Documentation * diff --git a/hurd/translator/fifo.mdwn b/hurd/translator/fifo.mdwn new file mode 100644 index 00000000..857922fc --- /dev/null +++ b/hurd/translator/fifo.mdwn @@ -0,0 +1,48 @@ +[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +The *fifo* translator implements named pipes (FIFOs). + + +# Open Issues + +## Not Terminating + +[[!tag open_issue_hurd]] + + +### IRC, OFTC, #debian-hurd, 2013-07-28 + + seems fifos started dying, as they should. am i wrong? + ( http://bugs.debian.org/629184 ) + so you're saying the bug should be closed? + best to comment on the bug then + i didn't hear anyone working on it, so i'm a bit surprised + could be due to lower-level fixes to glibc or so + and given often(:|) i'm wrong, i was asking + in two years there have been various changes in glibc and hurd + (for example the switch to pthreads) + yeah seems fixed. mknod'ing one then removing it, doesn't leave any + process around + cool + then please follow-up on the bug and/or close it + sure + the pleasure of closing it/them is yours + great job, whatever you did :) + + +### IRC, OFTC, #debian-hurd, 2013-07-29 + + * gg0 wonders if it can close savannah one as + wellhttps://savannah.gnu.org/bugs/?17128 + gg0: wdym? + gg0: got an example? + http://bugs.debian.org/629184 + i didn't close it myself diff --git a/hurd/translator/hostmux.mdwn b/hurd/translator/hostmux.mdwn index 5fab2dc5..ef16505b 100644 --- a/hurd/translator/hostmux.mdwn +++ b/hurd/translator/hostmux.mdwn @@ -29,3 +29,18 @@ When **/ftp** is accessed, the first directory is interpreted as ho You can see the new created translator in the process list: **ps ax | grep ftpsfs** . You shoud see **/hurd/ftpfs / ftp.yourhost.com** . -- [[Main/PatrickStrasser]] - 13 Jul 2004 + + +# Open Issues + +## IRC, freenode, #hurd, 2013-09-21 + +[[!tag open_issue_hurd]] + + ls /http://:/ + the image came with a global translator though I see it doesn't + grokk the alternate port notation. + oh right + I shall return to the fine documentation + it's a hostmux, it doesn't understand ports + damn, one thus can't url plain urls with that scheme diff --git a/hurd/translator/httpfs.mdwn b/hurd/translator/httpfs.mdwn new file mode 100644 index 00000000..dc4a62f7 --- /dev/null +++ b/hurd/translator/httpfs.mdwn @@ -0,0 +1,100 @@ +[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +`httpfs` is a virtual filesystem allowing you to access web pages and files. + + +# Source + + + + +# Documentation + +## IRC, freenode, #hurd, 2013-09-03 + +[[!tag open_issue_documentation]] + + hi, why I can't cd to /http:/richtlijn.be/~larstiq/hurd/ to do + grep? + this is not ftp + it works for other file + ? + I can't cd to ~larstiq, I don't know why + http is not a filesystem protocol + while httpfs could try in representing it as such, it is not + something reliable + ok, it's not reliable + I expect it can expose dir like browser + so, the translator just know href from home page, and one by + one + uh? + if ...:80/a/b/c.png exits, but not has a href in homepage, so I + can't cd to a, right? + you are looking things from the wrong point of view + a web server can do anything with URLs, including redirecting, + URL rewriting and whatever else + so, how to understand httpfs's idea? + how httpfs list dir? + check its code + en, no need it's not reliable + it's not work, it's enough + I have an idea, for the file system, we explore dir level by + level, but for http, we change full path one + once time + maybe can allow user to cd any directory, and just mark as some + special color to make user know the translator was not sure, file exist + or not + once the file exits, mark all the parent directory as normal + color? + congzhang: you can find more info about httpfs here: + http://nongnu.org/hurdextras/ + congzhang: you're still looking at http from the wrong point of + view + there are no directories nor files + you start a request for a URL, and you get some content back (in + case there's no error) + you mean httpfs just for kidding? + that the content is a web page listing a directory on the + filesystem of the web server machine, or a file sent back via the + connection, or a complex web page, it's the same + congzhang: you can only get a list of files if the web server + responds with an index of files + "files" + The readme explains how httpfs does its thing: + http://cvs.savannah.gnu.org/viewvc/*checkout*/httpfs/README?root=hurdextras + if I can't cd to /http:/host/a/b how to get + /http:/host/a/b/c.html, even the file exist? + you don't cd in http + cd is for changing directory, which does not apply to a protocol + like http which is not fs-oriented + yes, I agree with you, http was not fs-oriented + so httpfs was not so useful + You can access the document directly, though, can't you? + rekado: I try once more + I can't + so, the httpfs need some extend, http protocol was not fs + oriented, so need some extend to make it work with file system + http is not designed for file system usage, so extending it is + useless + or, httpfs was not so useful + there are many other protocols for file systems + I don't think so + i do + if we can't make it more useful, remove it from hurd rep, or + extend it more useful + add some more rule, to make it work with file system + no + some paradox in it + which paradox? + for http vs file system + ??? + tree oriented and star topology oriented? + you don't make any sense diff --git a/hurd/translator/nsmux.mdwn b/hurd/translator/nsmux.mdwn index d156772b..6b3be79c 100644 --- a/hurd/translator/nsmux.mdwn +++ b/hurd/translator/nsmux.mdwn @@ -1,12 +1,12 @@ -[[!meta copyright="Copyright © 2009 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2009, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.2 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license -is included in the section entitled -[[GNU Free Documentation License|/fdl]]."]]"""]] +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] # nsmux @@ -119,3 +119,24 @@ of the simplest use-case of namespace-based translator selection in the form of translator `nsmux`. The filter is partially implemented and this is the immediate goal. Propagating translators down directories is the next objective. + + +## Open Issues + +### IRC, freenode, #hurd, 2013-08-22 + +[[!tag open_issue_hurd]] + + < youpi> err, is nsmux supposed to work at all? + < youpi> a mere ls doesn't work + < youpi> I'm running it as a user + < youpi> echo * does work though + < teythoon> ah, yes, nsmux,,is,,funny :p + < youpi> well, perhaps but I can't make it work + < youpi> well, the trivial ,,hello does work + < youpi> but ,,tarfs doesn't seem to be working for instance + < youpi> same for ,,mboxfs + < youpi> ,,xmlfs seems to somehow work a bit, but not very far... + < youpi> so it seems just nobody is caring about putting READMEs wherever + appropriate + < youpi> e.g. examples in socketio/ ... diff --git a/hurd/translator/pfinet.mdwn b/hurd/translator/pfinet.mdwn index f6f69ea4..bf535b21 100644 --- a/hurd/translator/pfinet.mdwn +++ b/hurd/translator/pfinet.mdwn @@ -1,5 +1,5 @@ -[[!meta copyright="Copyright © 2002, 2004, 2005, 2007, 2008, 2011 Free Software -Foundation, Inc."]] +[[!meta copyright="Copyright © 2002, 2004, 2005, 2007, 2008, 2011, 2013 Free +Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -33,6 +33,9 @@ installation. * [[DHCP]]. + * [[IPv6]]. + + * [[eth-filter]]: Firewall. + * [[Implementation]]. - * [[IPv6]]. diff --git a/hurd/translator/pfinet/implementation.mdwn b/hurd/translator/pfinet/implementation.mdwn index 9bcf62ef..2361615a 100644 --- a/hurd/translator/pfinet/implementation.mdwn +++ b/hurd/translator/pfinet/implementation.mdwn @@ -27,6 +27,170 @@ implementation. oh http://jl-icase.home.comcast.net/~jl-icase/LinuxTCP2.html +## IRC, freenode, #hurd, 2013-09-03 + +In context of the item on [[/contributing]]. + + About this task: "Make pfinet OK with the ethernet device going + away." --- how can I test this? How can I remove the ethernet device? + settrans on the ethernet device, handled by the netdde + translator + that is, make it go away (settrans -fg) + Ah, I see. + Thanks + check its status before with showtrans + then, after having made it go away, set it again + I don't think I'm doing this right... After `settrans -fg + /dev/eth0` I should not be able to access the network anymore, but it + still works. + How can I figure out which of the four network devices is actually + used? + rekado: the file system is used to open files, i.e. access + services + it's not used to revoke access + once pfinet has obtained a port to the network device, it keeps it + oh, yes, of course. Sorry, this is all very + new to me. + I'm not sure what the problem is that this task describes. In + what way is pfinet "not OK" with the ethernet device going away? + rekado: the idea is to make pfinet able to cope with a driver + crash + Can I trigger a driver crash for test purposes? (Or do I have to + build a purposefully broken driver first?) + use kill + Oh, good. + iirc, netdde doesn't restart correctly :x + you'll probably have to fix it a bit + i guess there is some persistent state that prevents it from + reinitializing correctly + okay + I may need one more pointer: where can I find the netdde code? + Grep'ing around I only see it only mentioned as an argument to + /hurd/devnode; also: should I work in some incubator branch or directly + in the hurd repo? + rekado: incubator branch + Okay. Thank you for your patience. I'll play with this in the + next few days. + enjoy + :) + + +### IRC, freenode, #hurd, 2013-09-05 + + When I kill the /hurd/netdde process I can no longer access the + network (as expected); + To restore connectivity I run "settrans -g eth0 /hurd/devnode -M + /dev/netdde eth0" from the /dev directory. + When I access the network again everything is fine. (I do see a + message telling me "irq handler 11: release an dead delivery port" + ) + Is it the goal to avoid having to run settrans again to run netdde + after it crashes or is killed? + you don't need to run settrans again + that should get triggered automatically + Hmm, after killing netdde I get "Resource lost" when using wget. + It doesn't seem to be restarted automatically. + try again + the first wget makes pfinet try to use netdde and fail, thus crash + the second wil respawn pfinet + ideally pfinet shouldn't die, that's a TODO mentioned in the + "contributing page" + Ah, so that's what should be prevented. + it's just a matter of making pfinet be fine with errors from the + eth translator, and simply reopen it instead of dying + That's the thing I've been trying to figure out. + when I run wget a second (or third) time I get a different error; + "Name or service not known." + It's only okay again when I use settrans + maybe the devnode translator also needs some fixing + it's odd that I don't have the issue though + I'm using the qemu image, updated just yesterday. + same here + anyway, now you know where to put your hands :) + yes, thanks a lot. + + +### IRC, freenode, #hurd, 2013-09-07 + + in pfinet/ethernet.c:ethernet_open there's an assertion: + edev->ether_port == MACH_PORT_NULL + This is violated when netdde was killed and the device is + reopened. + I'm not sure what should be done: destroy the port before + reopening or drop the assertion? + If I drop the assertion, Mach seems to handle this just fine. + Says "irq handler 11: release an [sic] dead delivery port" and + then carries on without problems. + Is this a warning or an error, or can this be ignored? + (or none of the above?) + + +### IRC, freenode, #hurd, 2013-09-08 + + I have a simple patch for pfinet that lets it recover from an + error in ethernet_xmit when /hurd/netdde and /hurd/devnode have been + killed. + It doesn't work, though, when only netdde has been killed. + With devnode still around device_open fails with "(ipc/send) + invalid destination port" + I don't know where device_open is defined and why this error is + returned. + I guess the error refers to the "master_device" port returned by + file_name_lookup() in ethernet_open() + Why would file_name_lookup() return an invalid port when netdde is + dead but devnode is still running? + rekado: maybe because devnode needs to perform a fresh lookup as + well + + +### IRC, freenode, #hurd, 2013-09-09 + + braunr: re devnode: devnode only performs a single lookup in + parse_opt(), i.e. at start-up. + I'll try to understand devnode enough to patch it. + rekado: that's the problem + it should perform a lookup every time it's opened + +[[!message-id "1378730237-8091-1-git-send-email-rekado@elephly.net"]], +[[!message-id "1378731824-8928-1-git-send-email-rekado@elephly.net"]]. + + I submitted two patches to the mailing list. I've tested them on + Debian GNU/Hurd but based them on the incubator/dde branch. + rekado: awesome, reliability fixes are very much welcome + + +### IRC, freenode, #hurd, 2013-09-18 + + youpi: my apologies for the delay in getting back to you with + improvements to my pfinet/devnode patches. Been very busy. + rekado: development pace on the hurd has always been slow, no need + to apologize + +## MAC Addresses + +[[!tag open_issue_hurd]] + + +### IRC, freenode, #hurd, 2013-09-21 + + what command will show me the MAC address of an interface? + ah, too bad inetutils-ifconfig doesn't seem to be showing it + I don't think we already have a tool for that + it would be a matter of patching inetutils-ifconfig + + +## Routing Tables + +[[!tag open_issue_hurd]] + + +### IRC, freenode, #hurd, 2013-09-21 + + Hmmm, OK I can work around that, what about routing tables, can I + see them? can I add routes besides the pfinet -g default route? + I don't think there is a tool for that yet + it's not plugged inside pfinet anyway + # Reimplementation, [[!GNU_Savannah_task 5469]] diff --git a/hurd/translator/pflocal.mdwn b/hurd/translator/pflocal.mdwn index dc2434dc..fdcc39f1 100644 --- a/hurd/translator/pflocal.mdwn +++ b/hurd/translator/pflocal.mdwn @@ -1,13 +1,35 @@ -[[!meta copyright="Copyright © 2000, 2008 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2000, 2008, 2013 Free Software Foundation, +Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.2 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license -is included in the section entitled -[[GNU Free Documentation License|/fdl]]."]]"""]] +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] The implementation of the `pflocal` server is in the `pflocal` directory, and uses [[`libpipe`|libpipe]] (shared code with the [[named_pipe|fifo]] implementation). + + +# Open Issues + +## `SO_REUSEADDR` + +### IRC, freenode, #hurd, 2013-09-19 + + Hi, is SO_REUSEADDR supported at all on Hurd? I can only find two + entries: + in libdde-linux26 and pfinet/linux-src, and the functionality + seems to be unimplemented. + gnu_srs: pfinet supports it + gnu_srs: grep talks about pfinet/linux-src/net/core/sock.c: + case SO_REUSEADDR: + two times + Yes, and that is the implementation? + I wrote a test for AF_INET and it works, but not for AF_UNIX + (maybe not so interesting case). + pflocal does not support it + Is that of interest at all? diff --git a/hurd/translator/proc.mdwn b/hurd/translator/proc.mdwn index 98940f87..d5e0960c 100644 --- a/hurd/translator/proc.mdwn +++ b/hurd/translator/proc.mdwn @@ -51,3 +51,25 @@ It is stated by `/hurd/init`. it could just exec sysvinit I just think it's easier to patch hurd than to remove the assumption that init is pid 1 from sysvinit + + +## IRC, freenode, #hurd, 2013-09-13 + + teythoon: also, as a feature request, i'd like the proc server not + to have pid 0, if you have any time to do that + so it appears in top and friends + braunr: noted, that should be easy + not using 0 is probably a good thing, many things use pid 0 as + something special + + +# Process Discovery + +## IRC, freenode, #hurd, 2013-08-26 + + < teythoon> somewhat related, I do not like the way the proc server just + creates processes for new mach tasks it discovers + < teythoon> that does not play well with subhurds for example + < braunr> teythoon: i agree with you on proc process-to-task mapping + < braunr> that's something i intend to completely rework on propel + < braunr> in a way similar to how pid namespaces work on linux diff --git a/hurd/translator/procfs/jkoenig/discussion.mdwn b/hurd/translator/procfs/jkoenig/discussion.mdwn index fcda453e..44b8cc77 100644 --- a/hurd/translator/procfs/jkoenig/discussion.mdwn +++ b/hurd/translator/procfs/jkoenig/discussion.mdwn @@ -150,6 +150,9 @@ License|/fdl]]."]]"""]] it "just" needs to be commited :) in either case, it can't hurt to bring this up again :-) +[[community/gsoc/project_ideas/mtab/discussion]], *IRC, freenode, #hurd, +2013-09-07*. + # root group @@ -305,6 +308,13 @@ License|/fdl]]."]]"""]] See also [[community/gsoc/project_ideas/mtab]]. +## IRC, freenode, #hurd, 2013-09-20 + + teythoon: should procfs now have $pid/mounts files pointing to + ../mounts? + pinotree: probably yes + + # `/proc/[PID]/auxv` Needed by glibc's `pldd` tool (commit diff --git a/hurd/translator/ufs.mdwn b/hurd/translator/ufs.mdwn new file mode 100644 index 00000000..4d611e95 --- /dev/null +++ b/hurd/translator/ufs.mdwn @@ -0,0 +1,38 @@ +[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +The `ufs` translator supports some kind of the Unix File System. Beware, we're +not aware of anybody having used/tested it in ages, so maybe it is very broken +and will eat your data. + + +# IRC, freenode, #hurd, 2013-08-30 + +[[!tag open_issue_hurd]] + + There might be a copyright problem: well, there seems + BSD-4clauses in the code: + http://git.savannah.gnu.org/cgit/hurd/hurd.git/tree/ufs/alloc.c + braunr, tschwinge: Do you have any info on that? 4-clause BSD and + GPL on the same code are a license incompatibility… + Arne`: I've put it onto my (long) TODO list. + Easiest solution might be: rm -rf ufs. + will these affected code rewritten? or just modify license? + only the regents of the University of California could choose + to modify the license. + nalaginrut: one can't modify a licence if one is not the author + we can simply dump the code + s/author/owner/ + As I suppose ufs is unused/untested for a decade or so, I'd + have no issues with simply removing it from the tree, together with + ufs-fsck and ufs-utils. + tschwinge: or maybe extract the ufs stuff in an own repo, to be + imported as branch in incubator or own hurd/ufs.git? + Sure, why not. diff --git a/libpthread.mdwn b/libpthread.mdwn index fc5c0974..0f7f28fe 100644 --- a/libpthread.mdwn +++ b/libpthread.mdwn @@ -61,7 +61,7 @@ even if the current number of threads is lower. The same issue exists in [[hurd/libthreads]]. The current implementation in libpthread is -[[buggy|libpthread/t/fix_have_kernel_resources]]. +[[buggy|open_issues/libpthread/t/fix_have_kernel_resources]]. # Open Issues diff --git a/microkernel/discussion.mdwn b/microkernel/discussion.mdwn index a5a73e18..f5626f6c 100644 --- a/microkernel/discussion.mdwn +++ b/microkernel/discussion.mdwn @@ -10,7 +10,7 @@ License|/fdl]]."]]"""]] [[!tag open_issue_documentation]] -IRC, freenode, #hurd, 2011-07-26: +# IRC, freenode, #hurd, 2011-07-26 < antrik> Tekk_`: regarding microkernels: the basic idea, and really the *only* fundamental difference, is that they isolate things in separate @@ -22,3 +22,41 @@ IRC, freenode, #hurd, 2011-07-26: these are secondary effects: such choices can also be implemented in a monolithic architecture -- and not necessarily harder. just less obvious in some cases... + + +# IRC, freenode, #hurd, 2013-08-28 + + ok question + what is the big advantage of microkernels over monolithic kernels + as you guys see it? + is it entirely for the benefit of developers or are there actaully + practical advantages? + Spyro: there are many advantages, at least in theory, in terms of + modularity, flexibility, stability, scalability, security, ... which are + for everyone + Spyro: of course some advantages are practical + for me, the main advantage is system extensibility + you can replace system services at runtime + and on the hurd, you can do it as an unprivileged user + (the direct side effect is far increased security) + kilobug: i don't see the scalability advantages though + braunr: I would say it goes in par with the modularity, like, you + can have a full-weight IPv4/IPv6 stack for desktop, but a minimal stack + for embeded + i see + for me, it's in par with extensibility :) + i see modularity only as an implementation of extensibility + or a special case of it + Spyro: basically, it's supposed to bring the same advantages as + fuse, but even more so (because it's not limited to file systems), and + better (because it's normally well integrated with the core of the + system) + also, fuse is kind of bolted on and Linux composes really badly + e.g. it is not possible to nfs export a fuse mounted filesystem + on Linux + bolted ? + isn't that the term? as in being attached using screws? + i'm not familiar with it :p + "a posteriori design" + yes + ok diff --git a/microkernel/l4.mdwn b/microkernel/l4.mdwn index de311497..ef39616b 100644 --- a/microkernel/l4.mdwn +++ b/microkernel/l4.mdwn @@ -1,5 +1,5 @@ -[[!meta copyright="Copyright © 2004, 2006, 2007, 2008, 2010, 2011, 2012 Free -Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2004, 2006, 2007, 2008, 2010, 2011, 2012, 2013 +Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -30,6 +30,14 @@ is now stalled. Genode and L4: http://www.youtube.com/user/drsartakov?feature=watch +# IRC, freenode, #hurd, 2013-08-26 + + < Spyro> also + < Spyro> what's the basic difference between mach and L4? + < braunr> l4 is a nanokernel whereas mach is a hybrid with high level + messaging and virtual memory services + + [[!ymlfront data=""" sel4: diff --git a/microkernel/mach/concepts.mdwn b/microkernel/mach/concepts.mdwn index 0f7cbf00..08bce3f5 100644 --- a/microkernel/mach/concepts.mdwn +++ b/microkernel/mach/concepts.mdwn @@ -31,3 +31,20 @@ text="*[[mach\_kernel\_principles|documentation]]*: In particular the [[!toggle id=mach_kernel_principles text="[mach\_kernel\_principles]"]] book further elaborates on Mach's concepts and principles. + + +# IRC, freenode, #hurd, 2013-08-26 + + < stargater> then is mach not more microkernel + < stargater> when it have driver inside + < braunr> mach is a hybrid + < braunr> even without drivers + < stargater> in www i read mach is microkernel + < stargater> not hybrid + < braunr> the word microkernel usually includes hybrids + < braunr> true microkernels are also called nanokernels + < braunr> the word isn't that important, what matters is that mach does + more in kernel than what the microkernel principle implies + < braunr> e.g. high level async IPC and high level virtual memory + operations + < braunr> including physical memory management diff --git a/microkernel/mach/deficiencies.mdwn b/microkernel/mach/deficiencies.mdwn index 03e4a8b0..8f47f61f 100644 --- a/microkernel/mach/deficiencies.mdwn +++ b/microkernel/mach/deficiencies.mdwn @@ -2318,3 +2318,69 @@ In context of [[open_issues/multithreading]] and later [[open_issues/select]]. about them even with that, it should be scalable enough for a start and improving those parts shouldn't be too difficult + + +## IRC, freenode, #hurd, 2013-07-10 + + braunr: From what I have understood you aim for x15 to be a + production ready μ-kernel for usage in the Hurd? Or is it unrelated to + the Hurd? + nlightnfotis: it's for a hurd clone + braunr: I see. Is it close to any of the existing + microkernels as far as its design is concerned (L4, Viengoos) or is it + new research? + it's close to mach + and qnx + + +## IRC, freenode, #hurd, 2013-07-29 + + making progress on x15 pmap module + factoring code for mapping creation/removal on current/kernel and + remote processes + also started "swap emulation" by reserving some physical memory to + act as swap backing store + which will allow creating memory pressure very early in the + development process + + +## IRC, freenode, #hurd, 2013-08-23 + + < nlightnfotis> braunr: something a little bit irrelevant: how many things + are missing from mach to be considered a solid base for the Hurd? Is it + only SMP and x86_64 support? + < braunr> define "solid base for the hurd" + < nlightnfotis> solid enough to not look for a replacement for it + < braunr> then i'd say, from my very personal point of view, that you want + x15 + < nlightnfotis> I didn't understand this. Are you planning for x15 to be a + better mach? + < braunr> with a different interface, so not compatible + < braunr> and thus, not mach + < nlightnfotis> is the source code for it available? Can I read it + somewhere? + < braunr> the implied answer being: no, mach isn't a solid base for the + hurd considering your definition + < braunr> http://git.sceen.net/rbraun/x15.git/ + < nlightnfotis> thanks. for that. So it's definite that mach won't stay for + long as the Hurd's base, right? + < braunr> it will, for long + < braunr> my opinion is that it needs to be replaced + < nlightnfotis> is it possible that it (slowly) gets rearchitected into + what's being considered a second generation microkernel, or is it + hopeless? + < braunr> it would require a new interface + < braunr> you can consider x15 to be a modern mach, with that new interface + < braunr> from a high level view, it's very similar (it's a hybrid, with + both scheduling and virtual memory management in the kernel) + < braunr> ipc change a lot + + +## IRC, freenode, #hurd, 2013-09-23 + + for those of us interested in x15 and scalability in general: + http://darnassus.sceen.net/~rbraun/radixvm_scalable_address_spaces_for_multithreaded_applications.pdf + finally an implementation allowing memory mapping to occur + concurrently + (which is another contention issue when using mach-like ipc, which + often do need to allocate/release virtual memory) diff --git a/microkernel/mach/documentation.mdwn b/microkernel/mach/documentation.mdwn index cc880ab6..61e3469b 100644 --- a/microkernel/mach/documentation.mdwn +++ b/microkernel/mach/documentation.mdwn @@ -1,5 +1,5 @@ [[!meta copyright="Copyright © 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, -2010 Free Software Foundation, Inc."]] +2010, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -47,3 +47,14 @@ License|/fdl]]."]]"""]] - [An IO System for Mach](http://shakthimaan.com/downloads/hurd/An%20IO%20System%20for%20Mach.pdf) - [A Programmers' Guide to Mach System Call](http://shakthimaan.com/downloads/hurd/A.Programmers.Guide.to.the.Mach.System.Calls.pdf) + + +# IRC, freenode, #hurd, 2013-09-15 + + braunr: btw, are there multiple kernel threads in gnumach? + and is it safe to do a synchronous rpc call to a userspace + server? + teythoon: there are yes, but few + teythoon: the main (perhaps only) kernel thread is the page daemon + and no, it's not safe to do synchronous calls to userspace + except to the default pager diff --git a/microkernel/mach/gnumach/hardware_compatibility_list.mdwn b/microkernel/mach/gnumach/hardware_compatibility_list.mdwn index 587178e9..32e712c9 100644 --- a/microkernel/mach/gnumach/hardware_compatibility_list.mdwn +++ b/microkernel/mach/gnumach/hardware_compatibility_list.mdwn @@ -105,6 +105,11 @@ These boards are known to work. Gnumach/Hurd has been installed and run on these * VIA EPIA-M Mini-ITX motherboard with VIA Nehemiah C3 1Ghz processor. Onboard NIC (VIA Rhine) works good. * Compaq Deskpro ENS, Pentium3 (666 MHz upgraded to 1 GHz), Intel i815 chipset, chipset integrated NIC (detected twice, but works fine with eth0; trying to access eth1 confuses the driver and makes the system unusable), Matrox Mystique 220 (PCI) graphics card. Also works with rtl8029 (NE2000 PCI) NIC when onboard NIC disabled in BIOS setup. * Abit BX6 Rev. 2.0 with Celeron 400, after disabling "memory hole at 15MB" option in BIOS setup. (Otherwise, Mach detects only 15MiB of RAM, making Hurd run *extremely* slow and instable.) Should also work with PentiumII or Pentium3. +* IRC, freenode, #hurd, 2013-08-26: + + < stargater> have anyone gnu/hurd running on real hw ? + < youpi> my latitude e6420 laptop, for instance + # User Failure Reports diff --git a/microkernel/mach/message/msgh_id.mdwn b/microkernel/mach/message/msgh_id.mdwn index ea52904a..799ed5cc 100644 --- a/microkernel/mach/message/msgh_id.mdwn +++ b/microkernel/mach/message/msgh_id.mdwn @@ -13,6 +13,8 @@ License|/fdl]]."]]"""]] Every [[message]] has an ID field, which is defined in the [[RPC]] `*.defs` files. +[[!toc]] + # IRC, freenode, #hurd, 2012-07-12 @@ -281,3 +283,25 @@ files. then submit to the list for review hm ok youpi: ok, next time, i'll commit such changes directly + + +# Subsystems + +## IRC, freenode, #hurd, 2013-09-03 + + anything I need to be aware of if I want to add a new subsystem? + is there a convention for choosing the subsystem id? + a subsystem takes 200 IDs + grep other subsystems in mach and the hurd to avoid collisions of + course + yes + i know that ;) + :) + i've noticed the _notify subsystems being x+500, should I follow + that? + 100 for rpc + 100 for their replies? + teythoon: yes + pinotree: yes + ok + we should really work on mig... + ... :) diff --git a/microkernel/mach/mig.mdwn b/microkernel/mach/mig.mdwn index d6340574..f8046cb2 100644 --- a/microkernel/mach/mig.mdwn +++ b/microkernel/mach/mig.mdwn @@ -24,7 +24,8 @@ them to the client program. Similar actions are provided in the skeletons that are linked to server programs. MIG allows very precise semantics to be specified about what the arguments are and how to be passed. -It has its problems with [[structured_data]], however. +It has its problems with +[[structured_data|open_issues/mig_portable_rpc_declarations]], however. * [[Documentation]] diff --git a/microkernel/mach/mig/documentation.mdwn b/microkernel/mach/mig/documentation.mdwn index 7d4f1eca..e6bd1bb9 100644 --- a/microkernel/mach/mig/documentation.mdwn +++ b/microkernel/mach/mig/documentation.mdwn @@ -1,5 +1,5 @@ -[[!meta copyright="Copyright © 2002, 2003, 2005, 2007, 2008, 2009, 2010 Free -Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2002, 2003, 2005, 2007, 2008, 2009, 2010, 2013 +Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -82,3 +82,20 @@ pp. 67--77." * [[ServerCopy]] * MIG *in action*: [[hurd/io_path]]. + + +## IRC, freenode, #hurd, 2013-09-04 + +[[!tag open_issue_documentation open_issue_mig]] + + btw, I just realized that mig mashes two very different things + together, namely the serialization/parsing and the message + sending/receiving + yes + I'd prefer it if that were separated + me too + that's why i want x15 to have a bare messaging interface .. :) + \o/ + simple (but optimized) scatter-gather + it makes sense for mig since mach messages do include + serialization metadata such as types diff --git a/microkernel/mach/mig/structured_data.mdwn b/microkernel/mach/mig/structured_data.mdwn deleted file mode 100644 index 1c8abe08..00000000 --- a/microkernel/mach/mig/structured_data.mdwn +++ /dev/null @@ -1,119 +0,0 @@ -[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]] - -[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable -id="license" text="Permission is granted to copy, distribute and/or modify this -document under the terms of the GNU Free Documentation License, Version 1.2 or -any later version published by the Free Software Foundation; with no Invariant -Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license -is included in the section entitled [[GNU Free Documentation -License|/fdl]]."]]"""]] - -[[!tag open_issue_mig]] - -# IRC, freenode, #hurd, 2013-06-25 - - is there a nice way to get structured data through mig that I - haven't found yet? - say an array of string triples - no - :/ - but you shouldn't need that - my use case is getting info about fs translators from init to - procfs - -[[community/gsoc/project_ideas/mtab]]. - - should I go for an iterator like interface instead? - depends - how many do you need ? - you could go for a variable sized array too - have a look at what already exists - records, maybe 10-15, depends on many fs translators are running - a variable sized array is ok if the size isn't too big (and when i - say too big, i mean hundreds of MiB) - an iterator is ok too if there aren't too many items - you may want to combine both (i think that's what proc does) - be aware that the maximum size of a message is limited to 512 MiB - yeah I saw the array[] of stuff stuff, but array[] of string_t - does not work, I guess b/c string_t is also an array - how would I send an array of variable length strings? - i'm not sure you can - or maybe out of line - somehow I expected mig to serialize arbitrary data structures, - maybe it's to old for that? - yeah, I read about uot of line, but that seems overkill - it is old yes - and not very user friendly in the end - let me check - we could stuff json into mig... - see proc_getallpids for example - we could get rid of low level serialization altogether :p - hah, exactly what I was looking at - (which is what i'll do in x15) - type pidarray_t = array[] of pid_t; - but that is trivial b/c its array[] of pid_t - and always have the server writing guide near you - yes - well, make one big string and an array of lengths :p - thought about that and said to myself, there must be a better - way that I haven't found yet - or one big string filled with real null-terminated c strings that - you keep parsing until you ate all input bytes - i'm almost certain there isn't - type string_t = c_string[1024]; /* XXX */ - yes - even that isn't really variable sized - you think anyone would object to me putting a json encoder in - /hurd/init? it is probably better than me at serializing stuff... - try with mig anyway - the less dependencies we have for core stuff, the simpler it is - but i agree, mig is painful - would it be too hacky if I abused the argz functions? they do - exactly what I'd need - - -## IRC, freenode, #hurd, 2013-06-26 - - there is https://code.google.com/p/protobuf-c/ and it has a rpc - mechanism and I believe one could plug arbitrary transports easily - please don't think about it - we really don't want to add another layer of serialization - it's better to completely redesign mach ipc anyway - and there is a project for that :p - ive seen x15 - just food for thought - i've studied google protocol buffers - and fyi, no, it wouldn't be easy to plug arbitrary transports on - top of mach - there is a lot of knowledge about mach ports in mig - -[[community/gsoc/project_ideas/mtab]]. - - but again I face the challenge of serializing a arbitrary sized - list of arbitrary sized strings - yes - list of ports is easier ;) but I think its worthwile - so what about abusing argz* for this? you think it's too bad a - hack? - no since it's in glibc - awesome :) - but i don't remember the details well and i'm not sure the way you - use it is safe - yeah, I might have got the details wrong, I hadn't had the - chance to test it ;) - - about this dynamic size problem - a "simple" varying size array should do - you can easily put all your strings in there - seperated by 0? - yes - that's exactly what the argz stuff does - you'll get the size of the array anyway, and consume it until - there is no byte left - good - but be careful with this too - since translators can be run by users, they somtimes can't be - trusted - and even a translator running as root may behave badly - so careful with parsing - noted diff --git a/open_issues/anatomy_of_a_hurd_system.mdwn b/open_issues/anatomy_of_a_hurd_system.mdwn index 75a62535..ba72b00f 100644 --- a/open_issues/anatomy_of_a_hurd_system.mdwn +++ b/open_issues/anatomy_of_a_hurd_system.mdwn @@ -660,3 +660,146 @@ Actually, the Hurd has never used an M:N model. Both libthreads (cthreads) and l but as youpi said, it still requires work and nobody's working on it you may want to check l4 fiasco.oc though + + +# System Personality + +## IRC, freenode, #hurd, 2013-07-29 + + over the past few days I gained a new understanding of the Hurd + teythoon: really ? :) + teythoon: That it's a complex and distributed system? ;-) + And at the same time a really simple one? + ;-D + it's just a bunch of mach programs and some do communicate and + behave in a way a posix system would, but that is more a convention than + anything else + tschwinge: yes, kind of simple and complex :) + the right terminology is "system personality" + 11:03 < teythoon> over the past few days I gained a new + understanding of the Hurd + teythoon: still no answer on that :) + braunr: ah, I spent lot's of time with the core servers and + early bootstrapping and now I gained the feeling that I've seen the Hurd + for what it really is for the first time + + +# RPC Interfaces + +## IRC, freenode, #hurd, 2013-09-03 + + I'm a little confused by the hurd and incubator git repos. + DDE is only found in the dde branch in incubator, but not in the + hurd repo. + Does this mean that DDE is not ready for master yet? + yes + If DDE is not yet used in the hurd (except in the dde branch in + the incubator repo), does pfinet use some custom glue code to use the + Linux drivers? + this has nothing to do with pfinet + pfinet is the networking stack, netdde are the networking drivers + the interface between them doesn't change, whether drivers are in + kernel or not + I see + + +# IRC, freenode, #hurd, 2013-09-20 + + HI there, I have no previous knowledge about OS's. I'm trying to + undestand the structure of the Hurd and the comparison between, say, + Linux way of managing stuff ... + for instance, I read: "Unlike other popular kernel software, the + Hurd has an object-oriented structure that allows it to evolve without + compromising its design." + that means that while for adding feature to the Linux-kernel you + have to add some stuff `inside` a procedure, whilst in the Hurd kernel + you can just, in principle at least, add an object and making the kernel + using it?... + Am I making stuff too simple? + Thanks + not exactly + unix historically has a "file-oriented" structure + the hurd allows servers to implement whatever type they want, + through the ability to create custom interfaces + custom interfaces means custom calls, custom semantics, custom + methods on objects + you're not restricted to the set of file interfaces (open, seek, + read, write, select, close, etc..) that unix normally provides + braunr: uhm ...some example? + see processes for example + see + http://darnassus.sceen.net/gitweb/savannah_mirror/hurd.git/tree/HEAD:/hurd + this is the collection of interfaces the hurd provides + most of them map to unix calls, because gnu aims at posix + compatibility too + some are internal, like processes + or authentication + but most importantly, you're not restricted to that, you can add + your own interfaces + on a unix, you'd need new system calls + or worse, extending through the catch-all ioctl call + braunr: mhn ...sorry, not getting that. + what part ? + ioctl has become such a mess :s + braunr: when you say that Unix is `file-oriented` you're + referring to the fact that sending/receiving data to/from the kernel is + designed like sending/receiving data to/from a file ...? + not merely sending/receiving + note how formatted your way of thinking is + you directly think in terms of sending/receiving (i.e. read and + write) + braunr: (yes) + that's why unix is file oriented, access to objects is done that + way + on the hurd, the file interface is one interface + there is nothing preventing you from implementing services with a + different interface + as a real world example, people interested in low latency + profesionnal audio usually dislike send/recv + see + http://lac.linuxaudio.org/2003/zkm/slides/paul_davis-jack/unix.html for + example + braunr: how big and messy ioctl has become is a good proof that + the Unix way, while powerful, does have its limits + giuscri: keep in mind the main goal of the hurd is extensibility + without special privileges + braunr: privileges? + root + braunr: what's wrong with privileges? + they allow malicious/buggy stuff to happne + and have dramatic effects + braunr: you're obviously *not* referring to the fact that once + one have the root privileges could change some critical-data + ? + i'm referring to why privilege separation exists in the first + place + if you have unprivileged users, that's because you don't want them + to mess things up + on unix, extending the system requires privileges, giving those + who do it the ability to destroy everything + braunr: yes, I think the same + the hurd is designed to allow unprivileged users to extend their + part of the system, and to some extent share that with other users + although work still remains to completely achieve that + braunr: mhn ...that's the `server`-layer between the + single-application and kernel ? + the multi-server based approach not only allows that, but + mitigates damage even when privileged servers misbehave + one aspect of it yes + but as i was just saying, even root servers can't mess things too + much + for example, our old (sometimes buggy) networking stack can be + restarted when it behaves wrong + the only side effect being some applications (ssh and exim come to + mind) which need to be restarted too because they don't expect the + network stack to be restarted + braunr: ...instead? + ? + giuscri: on Linux, if the network stack crash/freezes, you don't + have any other option than rebooting the system - usually with a nice + "kernel pani" + giuscri: and you may even get filesystem corruption "for free" in + the bundle + and hoping it didn't corrupt something important like file system + caches before being flushed + kilobug, braunr : mhn, ook diff --git a/open_issues/arm_port.mdwn b/open_issues/arm_port.mdwn index b07df939..ebbad1a4 100644 --- a/open_issues/arm_port.mdwn +++ b/open_issues/arm_port.mdwn @@ -273,3 +273,56 @@ architecture. braunr: OK, thanks. I'm interested on it, and didn't want to duplicate efforts. little addition: it may have started, but we don't know about it + + +# IRC, freenode, #hurd, 2013-09-18 + + as i understand ; on startup, vm_resident.c functions configure + the whole available memory ; but at this point the system does not split + space for kernel and space for future apps + when pages are tagged to be used by userspace ? + Hooligan0: at page fault time + the split is completely virtual, vm_resident deals with physical + memory only + braunr: do you think it's possible to change (at least) + pmap_steal_memory to mark somes pages as kernel-reserved ? + why do you want to reserve memory ? + and which memory ? + braunr: first because on my mmu i have two entry points ; so i + want to set kernel pages into a dedicated space that never change on + context switch (for best cache performance) + braunr: and second, because i want to use larger pages into + kernel (1MB) to reduce mmu work + vm_resident isn't well suited for large pages :( + i don't see the effect of context switch on kernel pages + at many times, context switch flush caches + ah you want something like global pages on x86 ? + yes, something like + how is it done on arm ? + virtual memory is split into two parts depending on msb bits + for example 3G/1G + MMU will use two pages tables depending on vaddr (hi-side or + low-side) + hi is kernel, low is user ? + so, for the moment i've put mach at 0xC0000000 -> 0xFFFFFFFF ; + and want to use 0x00000000 -> 0xBFFFFFFF for userspace + yes + ok, that's what is done for x86 too + 1MB pages for kernel ; and 4kB (or 64kB) pages for apps + i suggest you give up the large page stuff + well, you can use them for the direct physical mapping, but for + kernel objects, it's a waste + or you can rewrite vm_resident to use something like a buddy + allocator but it's additional work + for the moment it's waste ; but with some littles changes this + allow only one level of allocation mapping ; -i think- it's better for + performances + Hooligan0: it is, but not worth it + will you allow changes into vm_resident if i update i386 too ? + Hooligan0: sure, as long as these are relevant and don't introduce + regressions + ok + Hooligan0: i suggest you look at x15, since you may want to use it + as a template for your own changes + as it was done for the slab allocator for example + e.g. x15 already uses a buddy allocator for physical memory diff --git a/open_issues/boehm_gc.mdwn b/open_issues/boehm_gc.mdwn index 7f860bba..623dcb83 100644 --- a/open_issues/boehm_gc.mdwn +++ b/open_issues/boehm_gc.mdwn @@ -1,4 +1,5 @@ -[[!meta copyright="Copyright © 2010, 2012 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2010, 2012, 2013 Free Software Foundation, +Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -433,3 +434,92 @@ restults of GNU/Linux and GNU/Hurd look very similar. pinotree: is it a Debian-specific change, or included upstream? libgc using SIGUSR1/2? upstream ok + + +### IRC, freenode, #hurd, 2013-09-03 + + braunr: when will libc malloc say memory corruption? + congzhang: usually on free + sometimes on alloc + and after one thread be created + I want to know why and how to find the source + does libgc work well on hurd? + i don't think it does + so , why it can't? + congzhang: what ? + libgc was not work on hurd + why? + I try porting dotgnu + ah + nested signal handling + one program always receive Abort signal + and why it should be a problem in libgc? + for malloc memory corruption + libgc relies on this + yes + so, is there a workaround to make it work? + show the error please + http://paste.debian.net/34416/ + where's libgc? + i compile dotgnu with enable-gc + so? + I am not sure about it + so why did you say earlier that libgc doesn't work? + because after I see one thread was created notice by gdb, it + memory corruption + so what? + maybe gabage collection happen, and gc thread start + that's speculation + you cannot debug things speculating on code you don't know + less speculation and more in-deep debugging, please + * congzhang I try again, to check weather thread list changing + sorry for this + it simply looks like a real memory corruption (an overflow) + maybe PATH related problem + PATH? + yes + PATH_MAX + but unlikely + csant do path traverse + I fond the macro + found + #if defined(__sun__) || defined(__BEOS__) + #define BROKEN_DIRENT 1 + #endif + and so for hurd? + BROKEN_DIRENT doesn't say much about what it does + nope + whoops + it seems other port meet the trouble too + which trouble? + http://comments.gmane.org/gmane.comp.gnu.dotgnu.developer/3642 + (gdb) ptype struct dirent + type = struct dirent { + __ino_t d_ino; + unsigned short d_reclen; + unsigned char d_type; + unsigned char d_namlen; + char d_name[1]; + } + + d_name should be char[PATH_MAX]? + and + http://libjit-linear-scan-register-allocator.googlecode.com/svn/trunk/pnet/support/dir.c + no + stop pasting that much + <_d3f> uhm PATH_MAX on the hurd? + and stop saying nonsense + sorry, i think four line was not worth to pastbin + they are 8 + never again + just try by defining BROKEN_DIRENT to 1 in all cases and see how + it goes + * congzhang read dir.c again + braunr: it does not crash this time, I do more test + + +#### IRC, freenode, #hurd, 2013-09-04 + + hi, I am dotgnu work on hurd, and even winforms app + s/am/make + and maybe c# hello world translate another day :) diff --git a/open_issues/clock_gettime.mdwn b/open_issues/clock_gettime.mdwn index 98454d45..65ab52df 100644 --- a/open_issues/clock_gettime.mdwn +++ b/open_issues/clock_gettime.mdwn @@ -197,4 +197,14 @@ In context of [[select]]. "atomic" update of the struct with time :) +# IRC, freenode, #hurd, 2013-09-04 + + do we have CLOCK_MONOTONIC ? + teythoon: i think we do but it's actually a simple offset from + CLOCK_REALTIME .. :) + ah never mind, I do hate this posix time interface anyways + really ? + i think librt is decent + + # Candidate for [[vDSO]] code? diff --git a/open_issues/cloud.mdwn b/open_issues/cloud.mdwn new file mode 100644 index 00000000..58ed2f5b --- /dev/null +++ b/open_issues/cloud.mdwn @@ -0,0 +1,49 @@ +[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +Some *cloud*y things. + +[[!toc]] + + +# [[!wikipedia OpenStack]] + +## IRC, freenode, #hurd, 2013-09-21 + + Hmmm, was hoping to run hurd on my kvm based openstack cloud, but + no virtio. + I see "Write virtio drivers for KVM. Ideally they would be + userland" is listed as a "small hack", as a sysadmin rather than an OS + hacker it doesn't sound small to me, but if there's some standard + documentation on porting drivers I could take a run at it. + well, perhaps "small" is not the proper word + compared to e.g. revamping disk i/o :) + it's not something one can achieve in e.g. 1h, for instance + it's not something straightforward either, one has to get + documentation about virtio (I don't know what exists), and get + documentation about the mach device interface (that's in the gnumach + manual, the devnode translator can be used as a skeleton) + jproulx: openstack imposes the use of virtio drivers? that's odd + that's more like I'd expect. I there's enough search terms in + your response for me to see what's really involved + youpi it doesn't impose that but it is how mine is configured the + other thousand VMs are happier that way. + I can look at that side too and see if I need to have everything + use the same device settings or if I can control it per instance + A bit of a non-sequitur at this point but just in case someone + searches the transcripts and sees my questions about hurd on openstack, + yes it is possible to specify non-virtio devices per image, here's the + commandline to load sthibault's qemu image into openstack with devices + that work: + glance image-create --property hw_disk_bus=ide --property + hw_cdrom_bus=ide --property hw_vif_model=rtl8139 --disk-format raw + --container-format bare --name gnu-hurd --copy-from + http://people.debian.org/~sthibault/hurd-i386/debian-hurd.img + jproulx: thanks, I've pushed it on the wiki diff --git a/open_issues/crash_server.mdwn b/open_issues/crash_server.mdwn index 7ed4afbf..5182df6f 100644 --- a/open_issues/crash_server.mdwn +++ b/open_issues/crash_server.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2009, 2010, 2011 Free Software Foundation, +[[!meta copyright="Copyright © 2009, 2010, 2011, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable @@ -189,6 +189,65 @@ one... mach_msg_trap /home/tschwinge/tmp/gnumach/gnumach-1-branch-Xen-branch.build/../gnumach-1-branch-Xen-branch/ipc/mach_msg.c:1367 + +# IRC, freenode, #hurd, 2013-09-07 + + I'm trying to investigate a crash in pfinet, so it will actually + die. I just want to know why it dies and what the value of a few + variables has been when it died. + have you tried to make it dump core? + oh, good idea. + I'll try that. + do you know how? + I don't, but I think I can figure it out. + look into /servers + do I just have to set CRASHSERVER=/servers/crash-dump-core and run + pfinet in that environment? + possibly, I've never heard of CRASHSERVER, but it's certainly + plausible ;) + I just link crash to crash-dump-core, that way it is permanent + and for all processes + found it in the website contents + gotta try that. + hmm, I can't get pfinet to dump core; linked /servers/crash to + /servers/crash-dump-core and compiled pfinet to raise(6) at one point. + But no core file is created. + :/ + rekado: try cd /tmp ; cat & kill -SIGILL %% to see if that dumps + core + yes, this works. + I replaced the original pfinet with my crashing version. + Should it dump core to /hurd then? + I'm not sure about it's wd + hm, ok, I just did settrans -ca foo /hurd/pfinet and then killed + that pfient with SIGILL and it dumped core + to the directory I issued the settrans from + So I must run it myself. I can't just replace the original binary + and have it dump core somewhere. + it seems that you have to use settrans -ca to start an active + translator + do fsysopts /servers/socket/2 to find out the cmdline of your + pfinet + that's very helpful. + thanks + then use this to restart it, e.g.: + settrans -afg /servers/socket/2 $(fsysopts /servers/socket/2) + if it dies it should dump core to you cwd + great. Thank you very much. I had been wondering how to get the + full cmdline of pfinet. + * rekado makes a note of fsysopts + yup, there's the core file. Nice. + cool 8D + btw, in case using gdb doesn't work out for your problem, if you + start pfinet (or any translator) this way (with -a == active), you can + write stuff to stderr + yeah, I noticed that. The assert() call wrote to stderr. Useful. + rekado: core dumps are another not-working-well feature of the + hurd :/ + i recommend attaching + rekado: In case that's still helpful: + . + --- If someone is working in this area, they may want to have a look at diff --git a/open_issues/dbus.mdwn b/open_issues/dbus.mdwn index 2f02579e..a41515a1 100644 --- a/open_issues/dbus.mdwn +++ b/open_issues/dbus.mdwn @@ -1,4 +1,5 @@ -[[!meta copyright="Copyright © 2011, 2012 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2011, 2012, 2013 Free Software Foundation, +Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -14,15 +15,17 @@ The dbus problems are due to missing scm credentials [[sendmsg_scm_creds]] and s [[pflocal_socket_credentials_for_local_sockets]]. There was also a problem with short timeout in [[select]], but that has been solved in Debian by setting a minimum timeout of 1ms. ---- +[[!toc]] -IRC, freenode, #hurd, 2011-11-26: + +# IRC, freenode, #hurd, 2011-11-26 BTW, how much effort is necessary to fix dbus? basically, have pflocal know who's the sender (pid/uid/gid/groups) in the socket send op -IRC, freenode, #hurd, 2011-12-16: + +# IRC, freenode, #hurd, 2011-12-16 pinotree: what's the problem with dbus ? braunr: select() returning 0 changed fd's with very short (eg < @@ -53,7 +56,8 @@ IRC, freenode, #hurd, 2011-12-16: hm i agree with neal, i don't understand why the timeout is given to the kernel as part of the mach_msg call -IRC, freenode, #hurd, 2011-12-20: + +# IRC, freenode, #hurd, 2011-12-20 hm, i don't see any occurrence of SCM_CREDENTIALS in dbus only SCM_RIGHTS @@ -88,3 +92,164 @@ IRC, freenode, #hurd, 2011-12-20: iirc roland didn't like one or more parts of it (but i could be wrong) ok + + +# IRC, freenode, #hurd, 2013-07-17 + + btw pinotree, what happened to your efforts to make dbus work? + not much, my initial patch was just a crude hack, a better + solution requires more thinkering and work + yes, ive seen that + but that was only a tiny patch against the libc, surely there + must be more to that? + not really + and the proper fix is to patch pflocal to query the auth server + and add the credentials? + possibly + that doesn't sound to bad, did you give it a try? + not really, got caught in other stuff + + +# IRC, freenode, #hurd, 2013-09-02 + + something is wrong with libc0.3 since the switch to 2.17. dbus + does not run any longer when rebuilt + the latest build of dbus was with 2.13: libc0.3-dev: already + installed (2.13-38) + debug it + Yes, I will. Maybe somebody could rebuild it and verify my + findings? + gnu_srs1: your finding is "doesn't work", which is generic and + does not help without investigation + just rebuild it and: e.g. ./build-debug/bus/dbus-daemon --system + (--nofork) + gnu_srs1: please, debug it + I have partially already. But maybe the problems only shows on + my box. I'll rebuild on another box before continuing debugging. + gnu_srs1: are you, by chance, running a libc or something else + with your scm_creds work? + I did, but I've backed to 2.17-92 right now. + sane problem with dbus on another box, something's fishy:-( + braunr: any good way to find out if the dbus problems are with + libpthread? Setting a breakpoint with libc0.3-dbg installed. + gnu_srs1: i don't know + +See [[glibc]], *Missing interfaces, amongst many more*, *`SOCK_CLOEXEC`*. + + +# IRC, freenode, #hurd, 2013-09-04 + + Hi, looks like dbus requires abstract socket namespace: #undef + HAVE_ABSTRACT_SOCKETS What's missing? + uh? + abstract unix sockets are a Linux feature, and surely it is not + mandatory for dbus + Looks like dbus exits if they are not supported: + dbus_set_error (error, DBUS_ERROR_NOT_SUPPORTED, "Operating + system does not support abstract socket namespace\n");   _dbus_close + (listen_fd, NULL); 1061  return -1; + that is enclosed in a if (abstract) + and that parameter is set to true in other places (eg + dbus/dbus-server-unix.c) only when HAVE_ABSTRACT_SOCKETS is defined + so no, abstract sockets are not mandatory + Well this code is executed e.g. when running emacs remotely in + X. Have to dig deeper then to see why. + maybe it could have to do the fact that your dbus server is + running in linux and runs by default using such sockets type + but yes, you need to dig better + pinotree: You are right. when running natively the problem is: + *drums* + Manually: Process /usr/lib/at-spi2-core/at-spi-bus-launcher + exited with status 1 + eh? + Error retrieving accessibility bus address: + org.freedesktop.DBus.Error.Spawn.ChildExited: ^ + most probably that service does not start due to the lack of + socket credentials which affects dbus + uninstall or disable those additional services, they are not + your problem + credentials is enabled. which services to remove? + dunno + + +# IRC, freenode, #hurd, 2013-09-11 + + Hi, looks like frebsd had (2008) the same problem as hurd when + sending credentials over PF_INET: + + http://lists.freebsd.org/pipermail/freebsd-hackers/2008-May/024577.html + Since the dbus code is about the same now (2013), maybe they + added support? + The next message in the thread confirms that the dbus code is + invalid, does anybody have pointers? + from what i've seen so far, socket credentials are done only for + local sockets (ie PF_UNIX) + i don't see how things like uid/gid/pid of the socket endpoint + can have anything to do with AF_INET + and socket credentials in dbus are used only in the [local] + socket transport, so there's no issue + + +# IRC, freenode, #hurd, 2013-09-12 + + pinotree: Yes, there is an issue with dbus and AF_INET, see + test/corrupt.c: tests /corrupt/tcp and /corrupt/byte-order/tcp:-/ + gnu_srs: what's wrong with those? they are just testing the + connection over a tcp socket + as said above, socket credentials shouldn't be used in such + cases + They are, see also test/relay.c: /relay and /limit tests:-( + how are they? + please be more specifc... + Just run the tests yourself with DBUS_VERBOSE=1 + you are claiming there is a problem, so please specify what is + the actual issue + DBUS_VERBOSE=1 build-debug/test/test-relay + you are claiming there is a problem, so please specify what is + the actual issue + same with test-corrupt + look at the verbose output: Failed to write credentials: Failed + to write credentials byte: Invalid argument + coming from pfinet since PF_INET is used. + check what it does on linux then + put an abort() at the start of the read/write socket credential + functions in dbus-sysdeps-unix.c and see whether it is triggered also on + linux + SO_PEERCRED is used for linux and LOCAL_CREDS is used for + kfreebsd, so we are on our own here:-/ + and linux' SO_PEERCRED works also on AF_INET sockets? i'd doubt + it + + http://stackoverflow.com/questions/10037086/so-peercred-vs-scm-credentials-why-there-are-both-of-them + yes, i know the difference, but please read what i asked again + I'll check to be sure... + gnu_srs: user credentials are not supposed to be passed through an + AF_INET socket + how hard is that to understand ? + OK, linux use send since CMSGCREDS is not defined to write + credentials. Working on how they are received. + braunr: I do understand, but the dbus code tries to do that for + Hurd:-( + then it should do that on linux too + (since the local socket credentials code is isolated in own + functions, and they are called only for the unix transport) + Happiness:-D, almost all dbus tests pass! + 17(17) dbus tests pass:) + gnu_srs: hopefully your patch does things right + which patch + adding credentials through unix socket + isn't that what you're doing ? + the mail to MLs is from the stock installed packages. + ? + the test reports are with the SCM_CREDS patches, but I stumbled + on the SCM_RIGHTS issues reported to MLs + no patches applied, just test the attached file yourself. + so what's your work about ? + I'm working on SCM_CREDS, yes, and created patches for dbus, + glib2.0 and libc. + the mail was about some bug in the call to io_restrict_auth in + sendmsg.c: without any of my patches applied (another image) + gnu_srs: you have to give us more context, how are we supposed + to know how to find this sendmsg.c file? + (it's in glibc, but otherwise the remark is valid) + s/otherwise/anyway/ diff --git a/open_issues/dde.mdwn b/open_issues/dde.mdwn index 76b80211..9cb31d1c 100644 --- a/open_issues/dde.mdwn +++ b/open_issues/dde.mdwn @@ -512,6 +512,18 @@ After the microkernel devroom at [[community/meetings/FOSDEM_2013]]. hm... good point +## IRC, freenode, #hurd, 2013-09-20 + + i should take some time to integrate my pcap changes into the + libpcap debian package at least + braunr: if upstream is active, i'd say to go there directly + the problem with that approach is that netdde is still not part of + our upstream code + don't understand the relation + i don't want to send the pcap guys code for an interface that is + still not considered upstream ... + + # IRC, freenode, #hurd, 2012-08-14 it's amazing how much code just gets reimplemented needlessly ... @@ -642,3 +654,135 @@ In context of [[libpthread]]. stuff there is but relatively to other improvements, it's low + + +## IRC, freenode, #hurd, 2013-09-14 + + I'm slowly beginning to understand the virtio driver framework + after reading Rusty's virtio paper and the Linux sources of a few virtio + drivers. + Has anyone started working on virtio drivers yet? + rekado: nobody has worked on virtio drivers, as I know of + youpi: I'm still having a hard time figuring out where virtio + would fit in in the hurd. + I'm afraid I don't understand how drivers in the hurd work at all. + Will part of this have to be implemented in Mach? + rekado: it could be implemented either as a Mach driver, or as a + userland driver + better try the second alternative + i.e. as a translator + sitting on e.g. /dev/eth0 or /dev/hd0 + + +## IRC, freenode, #hurd, 2013-09-18 + + To get started with virtio I'd like to write a simple driver for + the entropy device which appears as a PCI device when running qemu with + -device virtio-rng-pci . + why entropy ? + because it's the easiest. + is it ? + the driver itself may be, but integrating it within the system + probably isn't + It uses the virtio framework but only really consists of a + read-only buffer virtqueue + you're likely to want something that can be part of an already + existing subsystem like networking + All the driver has to do is push empty buffers onto the queue and + pass the data it receives back from the host device to the client + The thing about existing subsystems is: I don't really understand + them enough. + I understand virtio, though. + but isn't your goal understanding at least one ? + yes. + then i suggest working on virtio-net + and making it work in netdde + But to write a virtio driver for network I must first understand + how to actually talk to the host virtio driver/device. + rekado: why ? + There is still a knowledge gap between what I know about virtio + and what I have learned about the Hurd/Mach. + are you trying to learn about virtio or the hurd ? + both, because I'd like to write virtio drivers for the hurd. + hm no + with virtio drivers pass buffers to queues and then notify the + host. + you may want it, but it's not what's best for the project + oh. + what's best is reusing existing drivers + we're much too far from having enough manpower to maintain our own + you mean porting the linux virtio drivers? + there already is a virtio-net driver in linux 2.6 + so yes, reuse it + the only thing which might be worth it is a gnumach in-kernel + driver for virtio block devices + because currently, we need our boot devices to be supported by the + kernel itself ... + when I boot the hurd with qemu and the entropy device I see it as + an unknown PCI device in the output of lspci. + that's just the lspci database which doesn't know it + Well, does this mean that I could actually talk to the device + already? E.g., through libpciaccess? + I'm asking because I don't understand how exactly devices "appear" + on the Hurd. + it's one of the most difficult topic currently + you probably can talk to the device, yes + but there are issues with pci arbitration + * rekado takes notes: "pci arbitration" + so, this is about coordinating bus access, right? + yes + i'm not a pci expert so i can't tell you much more + heh, okay. + what kind of "issues with pci arbitration" are you referring to, + though? + Is this due to something that Mach isn't doing? + ideally, mach doesn't know about pci + the fact we still need in-kernel drivers for pci devices is a big + problem + we may need something like a pci server in userspace + on l4 system it's called an io server + How do in-kernel drivers avoid these issues? + they don't + Or rather: why is it they don't have these issues? + they do + oh. + we had it when youpi added the sata driver + so currently, all drivers need to avoid sharing common interrupts + for example + again, since i'm not an expert about pci, i don't know more about + the details + pci arbitrations are made by hardware ... no ? + Hooligan0: i don't know + i'm not merely talking about bus mastering here + simply preventing drivers from mapping the same physical memory + should be enforced somewhere + i'm not sure it is + same for irq sharing + braunr : is the support for boot devices into the kernel is + really needed if a loader put servers into the memory before starting + mach ? + Hooligan0: there is a chicken-and-egg problem during boot, + whatever the solution + obviously, we can preload from memory, but then you really want + your root file system to use a disk + Hooligan0: the problem with preloading from memory is that you + want the root file system to use a real device + the same way / refers to one on unix + so you have an actual, persistent hierarchy from which the system + can be initialized and translators started + you also want to share as much as possible between the early + programs and the others + so for example, both the disk driver and the root file system + should be able to use the same libc instance + this requires a "switch root" mechanism that needs to be well + defined and robust + otherwise we'd just build our drivers and root fs statically + (which is currently done with rootfs actually) + and this isn't something we're comfortable with + so for now, in-kernel drivers + humm ... disk driver and libc ... i see + in other way ... disk drivers can use only a little number of + lib* functions ; so with a static version, a bit of memory is lots + s/lots/lost + and maybe the driver can be hot-replaced after boot (ok ok, + it's more simple to say than to write) diff --git a/open_issues/device_drivers_and_io_systems.mdwn b/open_issues/device_drivers_and_io_systems.mdwn index 5bda0213..085a737a 100644 --- a/open_issues/device_drivers_and_io_systems.mdwn +++ b/open_issues/device_drivers_and_io_systems.mdwn @@ -92,3 +92,9 @@ Also see [[user-space device drivers]]. * OSF Mach * Darwin + + * IRC, freenode, #hurd, 2013-08-26 + + < stargater> in haiku is a layer wraper for bsd driver + < stargater> + https://www.haiku-os.org/news/2007-05-08/haiku_getting_a_freebsd_network_driver_compatibility_layer diff --git a/open_issues/exec.mdwn b/open_issues/exec.mdwn index ff3fccf5..fe70123d 100644 --- a/open_issues/exec.mdwn +++ b/open_issues/exec.mdwn @@ -10,7 +10,10 @@ License|/fdl]]."]]"""]] [[!tag open_issue_hurd]] -IRC, unknown channel, unknown date. +[[!toc]] + + +# IRC, unknown channel, unknown date. oh my, disabling gzip/bzip2 support makes apt preconfigure hang support in exec* I meant @@ -18,6 +21,50 @@ IRC, unknown channel, unknown date. now a funny bug: if I disable gzip/bzip2 support from exec trying to run a zero-byte file hangs + +## IRC, freenode, #hurd, 2013-08-01 + + uh, all the non trivial exec server code has #ifdef'd BFD code + all over it and it looks like that isn't even used anymore + that's too bad actually, I figured out how to get the values + from BFD, not so for the other elf parser that is used instead + + +## IRC, freenode, #hurd, 2013-08-05 + + btw, there is a Debian bug concerning zipped executables. now + I'm not sure if I understood the problem, but gziped and bzip2ed + executables work for me + (not that I'm a big fan of that particular feature) + iirc these somehow got fixed yes + something like a previous out of bound access + the exec server contains lot's of code that is unused and + probably bit rot (#ifdef BFD) or otherwise ignored (#if 0) + yes :/ + and there's gunzipping and bunzip2ing, which we probably don't + want anyway + why not? + we should strip all that from exec and start adding features + pinotree: b/c it's slow and the gain is questionable + it breaks mmapping the code in + exec/exec.c is huge (~2300 lines) and complex and it is an + essential server + and I wonder if the unzipping is done securely, e. g. if it's + not possible to crash exec with an maliciously compressed executable + + +## IRC, freenode, #hurd, 2013-09-12 + + The zip code in hurd/exec/ looks really complicated; does it + really just unpack zipped files in memory (which could be replaced by + library calls) or is there something else going on? + rekado: + http://lists.gnu.org/archive/html/bug-hurd/2013-08/msg00049.html + braunr: interesting. Thanks. + Does this mean that the "small hack entry" on the contributing + page to use libz and libbz2 in exec is no longer valid? + probably + --- May want to have a look at using BFD / libiberty/simpleobject. diff --git a/open_issues/exec_leak.mdwn b/open_issues/exec_leak.mdwn deleted file mode 100644 index b58d2c81..00000000 --- a/open_issues/exec_leak.mdwn +++ /dev/null @@ -1,57 +0,0 @@ -[[!meta copyright="Copyright © 2012 Free Software Foundation, Inc."]] - -[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable -id="license" text="Permission is granted to copy, distribute and/or modify this -document under the terms of the GNU Free Documentation License, Version 1.2 or -any later version published by the Free Software Foundation; with no Invariant -Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license -is included in the section entitled [[GNU Free Documentation -License|/fdl]]."]]"""]] - -[[!tag open_issue_hurd]] - - -# IRC, freenode, #hurd, 2012-08-11 - - the exec servers seems to leak a lot - server* - exec now uses 109M on darnassus - it really leaks a lot - only 109mb? few months ago, exec on exodar was taking more than - 200mb after few days of uptime with builds done - i wonder how much it takes on the buildds - - -# IRC, freenode, #hurd, 2012-08-17 - - the exec leak is tricky - bddebian: btw, look at the TODO file in the hurd source code - bddebian: there is a not from thomas bushnell about that - "*** Handle dead name notifications on execserver ports. ! - not sure it's still a todo item, but it might be worth checking - braunr: diskfs_execboot_class = ports_create_class (0, 0); - This is what would need to change right? It should call some cleanup - routine in the first argument? - Would be ideal if it could just use deadboot() from exec. - bddebian: possible - bddebian: hum execboot, i'm not so sure - Execboot is the exec task, no? - i don't know what execboot is - It's from libdiskfs - but "diskfs_execboot_class" looks like a class of ports used at - startup only - ah - then it's something run in the diskfs users ? - yes - the leak is in exec - if clients misbehave, it shouldn't affect that server - That's a different issue, this was about the TODO thing - ah - i don't know - Me either :) - For the leak I'm still focusing on do-bunzip2 but I am baffled - at my results.. - ? - Where my counters are zero if I always increment on different - vars but wild freaking numbers if I increment on malloc and decrement on - free diff --git a/open_issues/exec_memory_leaks.mdwn b/open_issues/exec_memory_leaks.mdwn index d504c4f0..67281bdc 100644 --- a/open_issues/exec_memory_leaks.mdwn +++ b/open_issues/exec_memory_leaks.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2012 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2012, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -12,8 +12,56 @@ License|/fdl]]."]]"""]] There are is some memory leak in [[`exec`|hurd/translator/exec]]. +[[!toc]] -# I + +# IRC, freenode, #hurd, 2012-08-11 + + the exec servers seems to leak a lot + server* + exec now uses 109M on darnassus + it really leaks a lot + only 109mb? few months ago, exec on exodar was taking more than + 200mb after few days of uptime with builds done + i wonder how much it takes on the buildds + + +## IRC, freenode, #hurd, 2012-08-17 + + the exec leak is tricky + bddebian: btw, look at the TODO file in the hurd source code + bddebian: there is a not from thomas bushnell about that + "*** Handle dead name notifications on execserver ports. ! + not sure it's still a todo item, but it might be worth checking + braunr: diskfs_execboot_class = ports_create_class (0, 0); + This is what would need to change right? It should call some cleanup + routine in the first argument? + Would be ideal if it could just use deadboot() from exec. + bddebian: possible + bddebian: hum execboot, i'm not so sure + Execboot is the exec task, no? + i don't know what execboot is + It's from libdiskfs + but "diskfs_execboot_class" looks like a class of ports used at + startup only + ah + then it's something run in the diskfs users ? + yes + the leak is in exec + if clients misbehave, it shouldn't affect that server + That's a different issue, this was about the TODO thing + ah + i don't know + Me either :) + For the leak I'm still focusing on do-bunzip2 but I am baffled + at my results.. + ? + Where my counters are zero if I always increment on different + vars but wild freaking numbers if I increment on malloc and decrement on + free + + +# 2012-11-25 After twelve hours worth of `fork/exec` ([[GCC]]'s `check-c` part of the testsuite), we got: @@ -29,7 +77,7 @@ quite noticeable. In comparison: 276 0 3 1 1 344 442M 28.2M 0.6 48:09.36 91min /hurd/ext2fs /dev/hd2s5 -# II +# 2012-12-20 After running the libtool testsuite for some time: diff --git a/open_issues/fakeroot_eagain.mdwn b/open_issues/fakeroot_eagain.mdwn index 6b684a04..168ddf7d 100644 --- a/open_issues/fakeroot_eagain.mdwn +++ b/open_issues/fakeroot_eagain.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2012 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2012, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -132,7 +132,7 @@ License|/fdl]]."]]"""]] or rather, a lot more (or maybe not, since it leaks only in some cases) -[[exec_leak]]. +[[exec_memory_leaks]]. pinotree: actually, the behaviour under linux is the same with the alternative correctly set, whereas faked-tcp is restarted (if used at diff --git a/open_issues/gccgo.mdwn b/open_issues/gccgo.mdwn index 9e724b95..a3c0e1d1 100644 --- a/open_issues/gccgo.mdwn +++ b/open_issues/gccgo.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2011, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -38,6 +38,15 @@ been working on this, has some (unpublished) patches, and this was blocked on [[`getcontext`/`makecontext`/`setcontext`/`swapcontext`|glibc]]. +### IRC, freenode, #hurd, 2013-08-26 + + < gnu_srs> tschwinge: on + http://www.gnu.org/software/hurd/open_issues/gccgo.html you might change + the text, my patches are published + < gnu_srs> http://lists.gnu.org/archive/html/bug-hurd/2013-06/msg00050.html + to msg00052.html + + ## `getcontext`/`makecontext`/`setcontext`/`swapcontext` usage analysis In context of [[glibc/t/tls-threadvar]]. Looking at GCC trunk commit diff --git a/open_issues/gdb.mdwn b/open_issues/gdb.mdwn index 67a38e96..aec797ee 100644 --- a/open_issues/gdb.mdwn +++ b/open_issues/gdb.mdwn @@ -355,6 +355,18 @@ like `gdb/testsuite/boards/cc-with-tweaks.exp` would help, or setting Cannot access memory at address 0x6c62616e (gdb) testcase ../../../Ferry_Tagscherer/gdb/testsuite/gdb.base/attach-pie-noexec.exp completed in 3 seconds + IRC, freenode, #hurd, 2013-09-06: + + How to debug a program that works in the shell but Cannot + access memory at address ... in gdb? + Build it without -pie -- but that is just a guess of what + might be going on. + * tschwinge clearly has spent enough time with obscure things to be + able to make such guesses. + tschwinge: looks like -fPIE is used. + verified: some (all?) executables compiled with -fPIE, -fpie + and linked with -pie cannot be debugged in gdb :( + * `solib-event stop` Running ../../../Ferry_Tagscherer/gdb/testsuite/gdb.mi/mi-catch-load.exp ... @@ -540,3 +552,34 @@ like `gdb/testsuite/boards/cc-with-tweaks.exp` would help, or setting related. TODO. + + +# Open Issues + +## [[tag/open_issue_gdb]] + +## `info files` SIGSEGV + +[[!tag open_issue_gdb]] + + +### IRC, freenode, #hurd, 2013-09-07 + + I'm trying to debug pfinet, but I'm not very familiar with gdb. + Tried to attach to the running pfinet process (built with debug symbols), + set a breakpoint and ... when I ran "info files" the process segfaulted. + which process segfaults, pfinet or gdb? + gdb segfaults. + + +## Watchpoints + +[[!tag open_issue_gdb]] + + +### IRC, freenode, #hurd, 2013-09-16 + + tschwinge: Is gdb watch known to fail on hurd? It hangs for me + when logged in via ssh. + gnu_srs: Don't know about GDB's watch command. Are you sure it + is hanging? diff --git a/open_issues/glibc.mdwn b/open_issues/glibc.mdwn index 31437744..b453b44f 100644 --- a/open_issues/glibc.mdwn +++ b/open_issues/glibc.mdwn @@ -281,14 +281,55 @@ Last reviewed up to the [[Git mirror's 0323d08657f111267efa47bd448fbf6cd76befe8 [[glibc_madvise_vs_static_linking]]. + IRC, OFTC, #debian-hurd, 2013-09-09: + + does hurd MADV_DONTNEED or MADV_FREE or none? + http://sources.debian.net/src/jemalloc/3.4.0-1/include/jemalloc/jemalloc_defs.h.in#L239 + seems it builds by defining JEMALLOC_PURGE_MADVISE_DONTNEED + but i don't know what i'm talking about, so it could build with + JEMALLOC_PURGE_MADVISE_FREE as well + + IRC, OFTC, #debian-hurd, 2013-09-10: + + gg0: it implements none, even if it defines DONTNEED (but + not FREE) + + See also: + + gnash (0.8.11~git20130903-1) unstable; urgency=low + + * Git snapshot. + + Embedded jemalloc copy has been replaced by system one. + [...] + - Disable jemalloc on hurd and kfreebsd-*. No longer disabled upstream. + * `msync` Then define `_POSIX_MAPPED_FILES`, `_POSIX_SYNCHRONIZED_IO`. - * `sys/epoll.h` + * `epoll`, `sys/epoll.h` Used by [[wayland]], for example. + IRC, freenode, #hurd, 2013-08-08: + + is there any possible to have kquque/epoll alike + things in hurd? or there is one? + nalaginrut: use select/poll + is it possible to implement epoll? + it is + we don't care enough about it to do it + (for now) + well, since I wrote a server with Guile, and it could + take advantage of epoll, never mind, if there's no, it'll use + select automatically + but if someday someone care about it, I'll be + interested on it + epoll is a scalability improvement over poll + the hurd being full of scalability issues, this one is + clearly not a priority + ok + * `sys/eventfd.h` * `sys/inotify.h` @@ -390,6 +431,429 @@ Last reviewed up to the [[Git mirror's 0323d08657f111267efa47bd448fbf6cd76befe8 libgc, libsigsegv, luatex, mono, nspr, pth, ruby1.8, texlive-bin, uim, and more. + IRC, OFTC, #debian-hurd, 2013-09-08: + + oh, and even ruby2.0 suffers because of fixed-stack + threads + yes, we definitely need to finish fixing it + my current work is in our glibc repo, youpi/tls-threadvar + | *** makecontext: a stack at 0xbc000 with size 0x40000 + is not usable with threadvars + all 8 failing tests with that + maybe we can hand-disable the use of contexts in ruby for + now? + gg0: ↑ :) + after the pseudo-patch i RFCed, i don't deserve to say + anything else about that :) + i mean, feel free to investigate and "fix" ruby2.0 as + above :) + eh maybe i'd just be able to hand-disable failing + thread-related _tests_ :) + i'm still hoping some real developer picks and actually fixes + it, seems it's not enough interesting though + 21:37 < youpi> yes, we definitely need to finish fixing it + afaiu youpi is working on threadvars-tls migration, which + would mean fixing them all. i just meant fixing ruby, which would + mean having puppet btw + gg0: "actually fixing" means fixing threadvars-tls + migration + "just fixing" ruby can be done by simply disabling context + use in ruby + + IRC, OFTC, #debian-hurd, 2013-09-10: + + this one fixes make test by disabling context and giving more + time to timing related tests http://paste.debian.net/plain/37977/ + make test-all is another story + gg0: AIUI, the sleep part should get fixed by the next + glibc upload, which will include the getclk patch + but the disabling context part could be good to submit to + the debian ruby package, mentioning that this is a workaround for + now + unfortunately still not enough, test-all still fails + does it make the package not build? + test-all is the second part of what we call tests + they build and package (they produce all ruby packages), + after that they run debian/run-test-suites.bash which is make + test + make test-all + well after or during the build doesn't matter, it's their + testsuite + ok just failed: + TestBug4409#test_bug4409 = Illegal instruction + make: *** [yes-test-all] Error 132 + what to do with Illegal instruction? + just found 2 words that make everybody shut up :p + same as above: debug it + gg0: have you confirmed that this is reproducible? I've + once had a process die with SIGILL and it was not and I figured + it might have been a (qemu?) glitch + seems i'm running tests which are disabled on _all_ archs, + better so + well, this should be reproducible. i just got it on a qemu, i + could try to reproduce it on real hardware but as just said, i + was testing tests disabled by maintainer so completely useless + gg0: yeah, I'm running all my hurd instances on qemu/kvm + as well, I meant did you get this twice in a row? + to be honest i got another illegal instruction months ago but + don't recall doing what + nope not twice, i've commented it out. then run the remaining + and then found out i should not have done what i was doing + but i could try to reproduce it + ok now i recall i got it another one few hours ago on real + hardware, from logs: + TestIO#test_copy_stream_socket = Illegal instruction + teythoon: on real hardware though + and this is the one i should debug once it finishes, still + running + + IRC, freenode, #hurd, 2013-09-11: + + ../sysdeps/mach/hurd/jmp-unwind.c:53: _longjmp_unwind: + Assertion `! __spin_lock_locked (&ss->critical_section_lock)' + failed. + and + ../libpthread/sysdeps/mach/pt-thread-halt.c:51: + __pthread_thread_halt: Unexpected error: (ipc/send) invalid + destination port. + gg0_: Which libpthread source are these? Stock Debian + package? + tschwinge: everything debian, ruby rebuilt with + http://paste.debian.net/plain/38519/ which should disable + *context + + IRC, OFTC, #debian-hurd, 2013-09-11: + + wrt ruby, i'd propose a patch that disables *context and + comments out failed tests (a dozen). most of them are timing + related, don't always fail + if they failed gracefully, we could leave them enabled and + just ignoring testsuite result, but most of them block testsuite + run when fail + anyone against? any better idea (and intention to implement + it? :p)? + youpi: is disabling some tests acceptable? ^ + it'd be good to at least know what is failing + so as to know what impact hiding these failures will have + remember that hiding bugs usually means getting bitten by + them even harder later :) + many of them use pipes + here the final list, see commented out ones + http://paste.debian.net/plain/38426 + and as said some don't always fails + test_copy_stream_socket uses a socket + note that we can still at least build packages with notest + at least to get the binaries uploaded + disabling *context should however really be done + and the pipe issues are concerning + I don't remember other pipe issues + so maybe it's a but in the ruby bindings + i just remember they didn't die, then something unknown + fixed it + I see something frightening in io.c + #if BSD_STDIO + preserving_errno(fseeko(f, lseek(fileno(f), + (off_t)0, SEEK_CUR), SEEK_SET)); + #endif + this looks very much like a workaround for an odd thing in + BSD + it happens that that gets enabled on hurd too, since + __MACH__ is defined + you could try to drop these three lines, just to see + this is very probably very worth investigating, at any rate + even just test_gets_limit_extra_arg is a very simple test, + that I fail to see why it should ever fail on hurd-i386 + starting debugging it would be a matter of putting printfs + in io.c, to check what gets called, with what parameters, etc. + just a matter of taking the time to do it, it's not very + complex + youpi: are you looking at 1.8? no BSD_STDIO here + yes, 1.8 + 1.9.3.448 + landed to sid few days ago + ah, I have 1.87 + +. + my favourites are TestIO#test_copy_stream_socket and + TestIO#test_cross_thread_close_fd -> Illegal instruction + TestIO#test_io_select_with_many_files sometimes Illegal + instruction, sometimes ruby1.9.1: + ../sysdeps/mach/hurd/jmp-unwind.c:53: _longjmp_unwind: Assertion + `! __spin_lock_locked (&ss->critical_section_lock)' failed. + + [[thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock]]? + + trying to debug illegal instruction + http://paste.debian.net/plain/38585/ + (yes, i'm not even good at gdbing) + any hint? + oh found out there's an intree .gdbinit, that might + complicate things + + IRC, OFTC, #debian-hurd, 2013-09-13: + + where should it be implemented MAP_STACK? plus, is it worth + doing it considering migration to tls, wouldn't it be useless? + sysdeps/mach/hurd/mmap.c i should reduce stupid questions + frequency from daily to weekly basis + + IRC, OFTC, #debian-hurd, 2013-09-14: + + say i managed to mmap 0x200000-aligned memory + now i get almost the same failed tests i get disabling + *context + that would mean they don't depend on threading + + IRC, freenode, #hurd, 2013-09-16: + + i get many ../sysdeps/mach/hurd/jmp-unwind.c:53: + _longjmp_unwind: Assertion `! __spin_lock_locked + (&ss->critical_section_lock)' failed. + by running ruby testsuite, especially during test_read* tests + http://sources.debian.net/src/ruby1.9.1/1.9.3.448-1/test/ruby/test_io.rb#L972 + read/write operations with pipes + gg0: that's weird + gg0: debian glibc ? + braunr: yep, debian 2.17-92 + sometimes assertion above, sometimes tests in question get + stuck reading + it would be nice reproducing it w/o ruby + probably massive io on pipes could do the job + also more nice finding someone who finds it interesting to + fix :p + ruby is rebuilt with http://paste.debian.net/plain/40755/, no + *context + pipe function in tests above creates one thread for write, + one for read + http://sources.debian.net/src/ruby1.9.1/1.9.3.448-1/test/ruby/test_io.rb#L26 + gg0: About the jmp-unwind assertion failure: is it be + chance this issue: + ? + I didn't look in detail. + tschwinge: that's what i thought too about the assertion, + which is why i find it strange + asserting it's not locked then locking it doesn't exclude + race conditions + + IRC, OFTC, #debian-hurd, 2013-09-17: + + youpi: i guess no one saw it anymore since + tg-thread-cancel.diff patch + it = + http://www.gnu.org/software/hurd/open_issues/thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock.html + this one comes from sysdeps/mach/hurd/jmp-unwind.c:53 though + another assertion to remove? + gg0: it's not exactly the same: in hurd_thread_cancel we + hold no lock at all at the assertion point + in jmp-unwind.c, we do hold a lock + and the assertion might be actually true because all other + threads are supposed to hold the first lock before taking the + other one + you could check for that in other places + and maybe it's the other place which wouldhave to be fixed + also look for documentation which would say that + + IRC, freenode, #hurd, 2013-09-17: + + gg0: is that what we do ?? + braunr: well, i was looking at + http://sources.debian.net/src/eglibc/2.17-92/debian/patches/hurd-i386/tg-thread-cancel.diff + which afaics fixes + http://www.gnu.org/software/hurd/open_issues/thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock.html + the one i get now is + http://sources.debian.net/src/eglibc/2.17-92/sysdeps/mach/hurd/jmp-unwind.c#L53 + 09:12 < youpi> gg0: it's not exactly the same: in + hurd_thread_cancel we hold no lock at all at the assertion point + 09:13 < youpi> in jmp-unwind.c, we do hold a lock + 09:13 < youpi> and the assertion might be actually true + because all other threads are supposed to hold the first lock + before taking the other one + gg0: that assertion is normal + it says there is a deadlock + ss->critical_section_lock must be taken before ss->lock + you mean ss->lock before ss->critical_section_lock + no + ah ok got it + that's a bug + longjmp + ugh + you could make a pass through the various uses of those + locks and check what the intended locking protocol should be + i inferred ss->critical_section_lock before ss->lock from + hurd_thread_cancel + this might be wrong too but considering this function is + used a lot, i doubt it + (no, i hadn't got it, i was looking at jmp-unwind.c where + lock is before critical_section_lock) + could we get useful info from gdb'ing the assertion? + gg0: Only if you first get an understanding why it is + happening, what you expect to happen instead/why it shall not + happen/etc. Then you can perhaps use GDB to verify that. + i can offer an irc interface if anyone is interested, it's + ready, just to attach :) + this is the test + http://sources.debian.net/src/ruby1.9.1/1.9.3.448-1/test/ruby/test_io.rb#L937 + pipe function creates two threads + http://sources.debian.net/src/ruby1.9.1/1.9.3.448-1/test/ruby/test_io.rb#L26 + Attaching to pid 15552 + [New Thread 15552.1] + [New Thread 15552.2] + (gdb) + + IRC, freenode, #hurd, 2013-09-21: + + gg0: it seems the assert (! __spin_lock_locked + (&ss->critical_section_lock)); is bogus + but it'd be good to catch a call trace + well, it may not be bogus, in case that lock is only ever + taken by the thread itself + in that case, inside longjmp_unwind we're not supposed to + have it already + ok, that's what we had tried to discuss with Roland + it can happen when playing with thread cancelation + youpi: the assertion isn't exactly bogus + the lock ordering is + braunr: which one are you talking about? + the one in hurd_thread_cancel looks really wrong + and some parts of the code keep the critical section lock + without ss->lock held, so I don't see how lock ordering can help + + IRC, OFTC, #debian-hurd, 2013-09-22: + + how much does this patch suck on a scale from 1 to 10? + http://paste.debian.net/plain/44810/ + well, the stack allocation issue will go away once I get + the threadvars away + I'm working on it right now + about the lib paths, it makes sense to add the gnu case, + but i386-gnu shouldn't be put in the path + that's great + so seems the wrong moment for what i've already done + ie. asking terceiro what he thinks about patch above :/ + any distro-independent way to get libc.so and libm.so path? + ruby as last resource takes them from "ldd ruby" + gg0: should work fine then + well it does. but gnu doesn't have a case so it hits default + which is broken + http://bugs.ruby-lang.org/projects/ruby-trunk/repository/revisions/40235/entry/test/dl/test_base.rb + btw even linux and kfreebsd with debian multipath have broken + cases but they don't hit default and get fixed by ldd later + why it is broken? are arguments passed to that script? + i'm not sure about what propose. a broken case so it doesn't + hit default like linux and kfbsd + yes they are :/ + and which ones are? who executes that script and which + arguments does it pass to it? + other ruby scripts which have nothing to do with libc/libm + well, if they pass arguments which should be the paths + to libc and libm, they must be getting such paths, aren't they? + they don't. arguments are other ruby scripts, don't know why, + maybe something else broken before + but that would mean that before there's a smarter path + detection way, i doubt + then add the case for hurd, but setting both libc and + libm as nil + so they will be fetched again + yep and would really ugly + +be + "please commit this one which wrongly sets paths." + an alternative would be removing default case + or pointing it out by proposing ldd in hurd case might make + them review the whole detection + by setting correct paths like in patch above it wouldn't + break a possible hurd-amd64, it would work due to ldd + gg0: that's why I said the patch is fine, but without the + i386-gnu part of the path + just like it happens to be on linux & kfreebsd + i might take ldconfig -p output + to make it uselessly correct from start + http://bugs.ruby-lang.org/issues/8937 + note thar ruby 1.8 is EOL + *that + -- If you're reporting a bug in both Ruby 1.9/2.0 and Ruby + 1.8: ruby-trunk, and write like "this bug can be reproduced in + Ruby 1.8 as well." -- + i suspect this one won't be the only one i'll file. unless + upcoming youpi's tls and braunr's thread destruction patches fix + all ruby tests + did you check ruby2.0 too, btw? + switched to ruby2 few hours ago. i pointed out 2nd part of + testsuite is not enabled, probably terceiro will enable it soon + by applying my patch above we'd completely fix current + ruby2.0 build (yes because tests are not completely enabled) + what you run those extra tests? + + http://anonscm.debian.org/gitweb/?p=collab-maint/ruby1.9.1.git;a=blob;f=debian/run-test-suites.bash + make test + make test-all + (test-all is 2nd part) + many are problematic. i didn't finish yet to suppress them + one-by-one. one i suppress, another one pops up + either get stuck or well known assertion + check those that get stuck :) + which kind of check? + "check" as in "debug" + btw i tested puppet few days ago (with ruby1.8), it seems to + be working, at least at trasferring files from master + don't know about any advanced usage + ruby 1.8 is going to die soon, so testing things against + it is not totally useful + so you assume 1.8 is less broken than 1.9/2.0, right? + no + i just can see it's been built without tests itself too + erm no + well ok, if i can be wrong, i'll be wrong + i say that after a quick check time ago, might be wrong + `getbuildlogs ruby1.8 last hurd-i386`, see the last + build log + ah from pkg-kde-tools + i hate kde :) + no? + no what? + devscripts: /usr/bin/getbuildlog + pkg-kde-tools: /usr/bin/pkgkde-getbuildlogs + which is not what i said + wait that's what apt-file found + maybe i should update it + is it so recent? + no + i just added an 's' more at the end of the command, but + typing getbu could have been helpful anyway... + yeah just got it + my fault not to have tried to run it before looking for it + and btw, i don't see what hating kde has to do with + tools developed by qt/kde debian packagers + j/k i simply don't use kde, never used and apt-file search + told me it was from pkg-kde-tools + btw build log says "make test" fails, doesn't even start. and + its failure doesn't block the build + exactly + s/make test/make test-all/ + "make test" (aka "1st part" above) doesn't run. i guess it's + missing in packaging + + IRC, freenode, #hurd, 2013-09-22: + + youpi: i mean the lock order where the assertion occurs is + reserved compared to the one in hurd_thread_cancel + (and the one in hurd_thread_cancel is the same used in + hurd condition-related functions) + "reserved" ? + reversed + :) + by "the assertion occurs", you mean gg0's spot? + yes + well , the assertion also happens in hurd_thread_cancel + it does oO + i didn't see that + but otherwise yes, it's completely bogus that we have both + locking in different orders + could you submit the fix for jmp-unwind.c to upstream? + what fix ? + reversing the lock order + ah, simply that + (well, provided that hurd_thread_cancel is right) + that's what i suggested to gg0 + to check where those locks are held and determine the + right order + * `recvmmsg`/`sendmmsg` (`t/sendmmsg`) From [[!message-id "20120625233206.C000A2C06F@topped-with-meat.com"]], @@ -401,13 +865,140 @@ Last reviewed up to the [[Git mirror's 0323d08657f111267efa47bd448fbf6cd76befe8 Then perhaps the Linux fallback case should be that instead of stubs, too.* + * `SOCK_CLOEXEC` + + IRC, freenode, #hurd, 2013-09-02: + + Do we support accept4 with the SOCK_CLOEXEC flag? + According to the code in sysdeps/mach/hurd/accept4.c + that case is not covered + (only O_NONBLOCK, not SOCK_NONBLOCK??)) + gnu_srs1: we do + but only for accept4, not for socket and socketpair + pinotree: cannot find the case for O_CLOEXEC covered in + __libc_accept4() + gnu_srs1: no, you need SOCK_* + The only code for accept4() is in sysdeps/mach/hurd/ and + it uses O_* for flags ? + flags = sock_to_o_flags (flags); + tried checking it? + Aha, tks:-D + and you don't need an explicit case of O_CLOEXEC, since + it is handled in other ways + + [[!message-id "1378154151.21738.15.camel@G3620.my.own.domain"]]. + + IRC, freenode, #hurd, 2013-09-03: + + any ideas about the SOCK_CLOEXEC issue? + didn't i tell already about it? + I did not find any hurd related code in tschwinges + branches. + you didn't check deep then... + so why does socket/socketpair not return ENOSYS then? + why should it, since they are implemented? + ... + for socket/socketpair? + gnu_srs: enosys means no system call + s/ENOSYS/EINVAL/ + see the mail to the bug-hurd/debian-hurd ML for more info + and tschwinges reply + which is what i knew already? + pinotree: please reply on the mailing list on the EINVAL + vs EPROTOTYPE issue to clarify things + gnu_srs: + https://sourceware.org/ml/libc-alpha/2013-02/msg00092.html + gnu_srs: things were clear already... + pinotree: I've read that patch and still pflocal/pf.c + returns EPROTOTYPE not changed by the __socket wrapper in eglibc + gnu_srs: what about realizing SOCK_CLOEXEC and friends + are NOT posix? + since socket/socketpair does not return EINVAL the dbus + code has to be patched then? + pflocal should never ever get such flags mixed to the + protocol, so any invalid value of protocol correctly returns + EPROTOTYPE + this is the question I need answered: Which way to go? + all of them + ? + - applications should not assume that because you have + accept4 (which is not posix) then SOCK_CLOEXEC and SOCK_NONBLOCK + (flags for it) are usable to socket and socketpair + - glibc should (as the idea of my patch) handle + implementations providing SOCK_* but not supporting them for + socket/socketpair + - finally the hurd part of glibc could implement them + to conclude: should I send a bug report for dbus then? + pinotree: yes or no? + gnu_srs: *shrug* i wrote it above, so an *upstream* + report (not a debian one) + + IRC, freenode, #hurd, 2013-09-06: + + I've found another error code issue, now in glib2.0 (and + dbus). Are you really sure the error code + for protocol of pflocal/pf.c should be + EPROTONOSUPPORT. The code expects EINVAL for a protocol with + SOCK_CLOEXEC, which is a flag. Maybe pf.c should add + this case and return EINVAL instead of + submitting bug reports upstream. Yes, I know this is not + POSIX, but it is defined for Hurd too, + currently only supported for accept4, not socket or + socketpair. + gnu_srs: no, and i explained already why it is wrong + this way + pflocal shouldn't even get such flags + (pflocal or any other server implementing socket_create) + (20:19:35) pinotree: pflocal shouldn't even get such + flags + then the glibc wrapper code is missing to catch this + flag:( + youpi: ? + gnu_srs: because, as told many times, socket and + socketpair do not support such flags + given they don't do, they filter nothing + and no, you need to file bugs upstream, since having + SOCK_* and accept4 does not imply at all that socket and + socketpair support them + + IRC, freenode, #hurd, 2013-09-07: + + A correction from yesterdays discussion: + s/EPROTONOSUPPORT/EPROTOTYPE + + IRC, freenode, #hurd, 2013-09-10: + + for dbus2.0 I found out that the third SOCK_CLOEXEC case + needs a patch too (more working tests), + the updated patch is at http://paste.debian.net/37948/ if + you have the time, otherwise I'll do it. + gnu_srs: which is what i wrote in my bug report... + Yes you wrote that, but the patch is not updated yet? + it refers to a different socket access, recently added, + which is not used by default + I got two more tests running when adding that patch:-/ + tests of what? + run-test.sh and run-test-systemserver.sh:P + tests of what? + i don't have the universal knowledge of the files in all + the sources + dbus-1.6.14/test/name-test/* + + [[!message-id "523A3D6C.2030200@gmx.de"]]. + + IRC, OFTC, #debian-hurd, 2013-09-19: + + tschwinge: ehm, regarding the SOCK_* patches for + socket/socketpair, didn't we talk about them when i worked on + eglibc 2.17? + For specific packages: * [[octave]] * Create `t/cleanup_kernel-features.h`. - * Add tests from Linux kernel commit messages for `t/dup3` et al. + * [[Secure_file_descriptor_handling]]. * In `sysdeps/unix/sysv/linux/Makefile`, there are a bunch of `-DHAVE_SENDFILE` -- but we do have `sendfile`, too. @@ -927,6 +1518,31 @@ Last reviewed up to the [[Git mirror's 0323d08657f111267efa47bd448fbf6cd76befe8 Yeah, that's known for years... :-D Probably not too difficult to resolve, though. + * IRC, OFTC, #debian-hurd, 2013-08-16: + + http://paste.debian.net/25934/ ← _hurd_thread_sigstate calls + malloc, boom + + * `conformtest` + + IRC, OFTC, #debian-hurd, 2013-09-22: + + btw, I noticed that glibc has a head conformance test which we + happily fail quite a bit :) + it's not so awful, we don't have twice as many failures as + linux, but not so far + youpi: do you mean "header" for "head", right? + err, where ? :) + btw, I noticed that glibc has a head conformance + test which we happily fail quite a bit :) + ah, yes + noticed that too + I had a quick look at the POSIX part, some things are probably + not too hard to change (e.g. exposing pthread_kill in signal.h) + others will by quite hard to fix (short type instead of int + type for some flock structure field) + s/by/be/ + * Verify baseline changes, if we need any follow-up changes: * a11ec63713ea3903c482dc907a108be404191a02 @@ -1253,6 +1869,13 @@ TODO. [[!message-id "20120723195143.7F8142C0B9@topped-with-meat.com"]]. + IRC, freenode, #hurd, 2013-08-27: + + < gnu_srs> Hi, is this fixed by now? + < gnu_srs> ../hurd/hurd.h:72:5: warning: case value ‘0’ not in + enumerated type ‘error_t’ [-Wswitch] + < pinotree> nope + * baseline fd5bdc0924e0cfd1688b632068c1b26f3b0c88da..2ba92745c36eb3c3f3af0ce1b0aebd255c63a13b (or probably Samuel's mmap backport) introduces: diff --git a/open_issues/glibc/debian.mdwn b/open_issues/glibc/debian.mdwn index 9886ec98..2ef2c474 100644 --- a/open_issues/glibc/debian.mdwn +++ b/open_issues/glibc/debian.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2011, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -24,6 +24,43 @@ locale stuff. `--disable-compatible-utmp`? +## IRC, freenode, #hurd, 2013-08-28 + + uh, the i686 profiles have much more progression than i386 + it seems they don't actually run these + youpi: what do you mean with "we don't run those"? + iirc there are three build profiles done, but there are 4 + regression test files + yes, but some failing tests are not run in the three build profiles + even if they are built for all of them + not even run? which ones? + see for instance test-ifloat.out + test-ifloat is built in all profiles, but only run in the libc one + don't have a glibc built tree around atm, sorry :/ + perhaps because glibc thinks it's not useful to run it again if it + fails on i386 + you can check the logs + do you think glibc's build system is that smart? :) + all the builds are done in separate builddirs, so theorically + they should not touch each other... + yes + that's why I'm surprised + could it be they get not run in optimized/particular builds? + what about linux/kfreebsd i386? + I don't see what makes them not run + or at least be treated particularly by th eMakefile + not run on kfreebsd either + pinotree: also, most of the tests now working have been marked as + failing by your patches for 2.17, would it be possible to retry them on + the box you used at that time? + that's the vm on my machine + which kind of vm? + kvm? + y + they are working here + with kvm + + # Building Run `debian/rules patch` to apply patches (instead of having it done during the diff --git a/open_issues/glibc/t/tls-threadvar.mdwn b/open_issues/glibc/t/tls-threadvar.mdwn index 609d866b..7ce36f41 100644 --- a/open_issues/glibc/t/tls-threadvar.mdwn +++ b/open_issues/glibc/t/tls-threadvar.mdwn @@ -76,3 +76,43 @@ dropped altogether, and `__thread` directly be used in glibc. tschwinge: yes, there were a lot other occurences of threadvars stuff in various places I'm building libc again, and will see what issue would remain + + +## IRC, freenode, #hurd, 2013-07-12 + + braunr: about the per-thread ports, there is also the mig reply + port + (stored in _HURD_THREADVAR_MIG_REPLY) + + +## IRC, freenode, #hurd, 2013-07-15 + + and with the branch youpi pushed where he removes threadvars, it + shouldn't get "too" hard + (save for the tricky bugs you may encounter) + well, that branch is not working yet + + +## IRC, OFTC, #debian-hurd, 2013-09-22 + + I'm currently tracking bugs with my threadvars changes + some of them seem fine, others, not + of course the most complex ones are the most probable culprits for + the issues I'm getting + fortunately they're after the process bootstrap + so basically that works + just a few dozen tests fail + mostly about loading .so or raising signals + dlopen("bug-dlsym1-lib1.so"): bug-dlsym1-lib1.so: cannot open + shared object file: Function not implemented + after having changed errno a bit + doesn't that look odd ? :) + good, I found an issue with the sigstate + now running testsuite again, to see whether there are other issues + with it :) + s/sigstate/reply_port/ actually + + +## IRC, OFTC, #debian-hurd, 2013-09-23 + + yay, errno threadvar conversion success diff --git a/open_issues/hurd_init.mdwn b/open_issues/hurd_init.mdwn new file mode 100644 index 00000000..b0b58a70 --- /dev/null +++ b/open_issues/hurd_init.mdwn @@ -0,0 +1,216 @@ +[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_hurd]] + + +# [[!message-id "20130625154749.17799.36923@thinkbox.jade-hamburg.de"]] + + +## IRC, freenode, #hurd, 2013-07-22 + + ok, so back to the drawing board for the next big issue, the + potential proc and init merge + Roland had some harsh words for that proposal, but noone else + raised concerns + noone else does not mean much + I guess only Roland actually understands the matter + so I'd tend to believe him + even though, his criticism was so superficial, he could at least + be a bit more specific... + i agree that the argument, being simply based on vague principle, + isn't very convincing + so, what should I do? + you can either keep them separate, or fight with roland + common braunr, I need a little more guidance in these kind of + social issues + a statement like this is of little use ;) + that's the best i can give you + :/ + i have one patch "fixing" HZ on the hurd, and i even get to fight + about it + I understand Roland has been around forever and keeps an eye on + stuff + but could/would he block a patch for hurd if e.g. youpi would + accept it + i.e. how much control has he in practice? + me fighting with him over a patch is of little value for anyone + and I don't care to do so + not much i suppose now + but we also have to agree with the change + with *real* arguments + (well, if it was up to me, i'd even merge exec with proc so ..) + ok, so I whip up a patch to see how it goes in practice and + present it so we could talk about the issue with something to look at + first + although maybe not ;p + you'll hit the same reaction + from Roland? + yes + and youpi said he tends to trust what roland says + so let's discuss the pros and cons a bit more + yes, but I'd honor his concerns if they were properly + presented. just telling me to hack on linux instead even though I think I + have demonstrated that I do want to work on Hurd is so childish in my + eyes that I do not consider that a valid argument at the moment + sure, shoot + well, functionally, they're unrelated + head -n1 init/init.c + /* Start and maintain hurd core servers and system run state + and thus it makes sense to make them separate, even if it does not + seem to bring anything useful now + history has shown that it makes a bed for nice things later + teythoon: that's not what proc is about + braunr: I know + braunr: that's what init is about in its own words ;) + teythoon: also, "simplifying the code" is not necessarily an + argument that would be considered + depending on the simplification + linux made it all simple by using a monolithic kernel :) + separating concerns is complex + but in the end it usually pays off on the Hurd + personally, I'd be fine with Guillem's solution, and renumbering + init's pid in Debian + there's a pending question from Roland actually: what information + is exchanged between init and proc in the end? + that's actually the point of the discussion: is that information + really big or not + I'm sorry, you lost me, where did he ask that question? + $ git grep proc_getmsgport | egrep '[0-9]' ← /hurd/init as pid 1 + is hardcoded in few places + teythoon: he didn't ask it this way, but that's the question I had + to be able to answer his + Date: Mon, 15 Jul 2013 10:36:35 -0700 (PDT) + > That's not what he said. He said there is a lot of information + > propagated from init to proc, and thus the separation is + questionable. + Are you talking about bootstrap, or what? + as I haven't investigated much, I couldn't answer this + pinotree: right. We could patch these in Debian + youpi: so, shall I refresh, test and refine Guillems patch and + resend it? + it's probably an easier way + ok, I start by doing that + + +## IRC, freenode, #hurd, 2013-07-25 + + pinotree: btw, there are two /sbin/init processes even with my + hacked up init/proc variant where /sbin/init gets to be pid 1 + never seen that + what are their parents? + pinotree: well, pid 1 is /sbin/init now, pid 13 or something has + the parent 1 + looks like init forks or something + i guess your sysvinit is compiled without INITDEBUG? + nothing in syslog either? + pinotree: it's compiled like the sysvinit shipped with debian + teythoon: do you have custom additions in inittab? + pinotree: a terminal for my serial console + *getty + are the getty started correctly for you, btw? + pinotree: yes + interesting + teythoon: back then, they were costantly respawning, with hurd's + getty's failing to start when exec'ed by (sysv)init + wonder what changed + pinotree: cool, magically went away then :) + + +## IRC, freenode, #hurd, 2013-07-29 + + youpi: I need some feedback on the not freezing translators + issue, more specifically whether I understood you correctly in your mail + from wednesday (20130724131552.GG9576@type.bordeaux.inria.fr) + oh yeah, and I had some questions yesterday too, about rpctrace + and dead-name notifications, specifically why /hurd/init is not receiving + any for the root translator and the exec server + teythoon: more details please + ok, so /hurd/init is registering for dead name notifications for + essential tasks + the rootfs and exec both register as essential tasks at init and + init requests successfully dead name notifications for them + if you e.g. kill the auth server, /hurd/init will notice and + crash the system + if you kill exec or the rootfs, /hurd/init does not get notified + I verified this with gdb and an subhurd + I'm puzzled by this, as the kernel is the one who sends the + notifications, right? + yes + teythoon: where is the problem ? + and it is not that the system is not sending any messages, it + is, I see the msgcount increase over time + braunr: dunno, as far as I can tell the kernel does not deliver + the notification for rootfs and exec + oh + those are the two processes loaded by grub, maybe they are + different somehow + is that affecting your work ? + no, not directly, I strayed around at the weekend, trying to + think of cool stuff hurd could do + youpi: I need some feedback on the not freezing translators + issue, more specifically whether I understood you correctly in your mail + from wednesday (20130724131552.GG9576@type.bordeaux.inria.fr) + teythoon: ok, now I'm available for the not-freezing-translators + thing :) + + +## IRC, freenode, #hurd, 2013-08-05 + + youpi: I'm in the process of producing a unified + sysvinit-as-pid1 and please-dont-kill-important-processes patch series + youpi: there is one issue with changing /hurd/inits pid, libcs + reboot() also assumes that it has the pid 1 + argl + that's bad, because it's then an ABI, not just an internal thing + hardcoding the pid is the worst way of getting a handle of any + server :/ + I've been thinking to make it explicit by binding it to + /servers/startup or something + that would be more hurdish than using a pid, yes + yes, and not only does it break the abi, but in a bad way + too. if the libc is updated before the hurd, the shutdown sequence is + broken in a way that the translators aren't synced :/ + youpi: as a workaround, we could make reboot() signal both pid 1 + and 2 + at worse pid 1 shouldn't get harmed by receiving a startup_reboot + RPC indeed + yes + + +## IRC, freenode, #hurd, 2013-08-16 + + grml, the procfs hardcodes the kernels pid :/ + there's always one more thing to fix... + uh, and we made pids.h a private header, so no nice constant for + the procfs translator :/ + server lookup by hardcoding the pid should be banned... + + +## IRC, freenode, #hurd, 2013-09-16 + + youpi: I'm thinking about splitting /hurd/init into /hurd/init + and /hurd/startup + that way, you could also merge the init as pid1 patches + that should be doable within the week + that would probably be better received by Roland than merging init + into proc :) + yes, I suppose so :D + perhaps you should start the discussion on the list about it + already, with just a sketch of which would do what + ok + fwiw I like the name startup b/c it speaks the startup protocol + teythoon: +1 startup + + +## IRC, freenode, #hurd, 2013-09-23 + + I've been hacking on init/startup, I've looked into cleaning it + up diff --git a/open_issues/libnetfs_argument_parsing.mdwn b/open_issues/libnetfs_argument_parsing.mdwn deleted file mode 100644 index e1e0d794..00000000 --- a/open_issues/libnetfs_argument_parsing.mdwn +++ /dev/null @@ -1,62 +0,0 @@ -[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]] - -[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable -id="license" text="Permission is granted to copy, distribute and/or modify this -document under the terms of the GNU Free Documentation License, Version 1.2 or -any later version published by the Free Software Foundation; with no Invariant -Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license -is included in the section entitled [[GNU Free Documentation -License|/fdl]]."]]"""]] - -[[!tag open_issue_hurd]] - -# IRC, freenode, #hurd, 2013-06-27 - - the arg parsing in libdiskfs and libnetfs differ :/ - afaics libdiskfs gets it right, libnetfs does not - what do you mean? - wrt to *_std_{runtime,startup}_argp - see eg netfs.h - libdiskfs/opts-std-runtime.c:const struct argp - diskfs_std_runtime_argp = - libdiskfs/opts-std-runtime.c-{ - libdiskfs/opts-std-runtime.c- std_runtime_options, parse_opt, - 0, 0, children - libdiskfs/opts-std-runtime.c-}; - but - libnetfs/std-runtime-argp.c:const struct argp - netfs_std_runtime_argp = { 0 }; - well there are no common startup/runtime options provided by - netfs - usually netfs-based translators put netfs_std_startup_argp as - child as their options, so if netfs starts providing options they would - work - ah - if you have a test showing issues, we can certainly look it :) - ok, m/b I was confused... - no worries, feel free to ask anytime - I thought about providing --update as common runtime flag, like - diskfs does, you think it's the right thing to do? - what would it do? - or should it be left for each translator to implement? - nothing by default I guess - options provided in libdiskfs are implemented and handled mostly - in libdiskfs itself - so imho a new option for libnetfs would be there because its - behaviour is implemented mostly within libnetfs itself - libdiskfs calls diskfs_reload_global_state - libnetfs could do the same, allowing translators to plug in - anything they wish - but I'll implement it in procfs for the time being - ah, its alias is remount - yes - I need that working for procfs - btw, I think I got your mount confusion thing figured out - but procfs has nothing to update/flush, all the information are - fetched at every rpc - yes - but we still need to ignore the flag - otherwise the set_options rpc fails - http://paste.debian.net/12938/ - whee, remounting proc works :) - :) diff --git a/open_issues/libnetfs_passive_translators.mdwn b/open_issues/libnetfs_passive_translators.mdwn new file mode 100644 index 00000000..db4c9005 --- /dev/null +++ b/open_issues/libnetfs_passive_translators.mdwn @@ -0,0 +1,55 @@ +[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_hurd]] + + +# IRC, freenode, #hurd, 2013-07-15 + + is there a libnetfs based translator that supports passive + translators? + I don't see any at the top of my head, only with active ones such + as hostmux + I suspected as much since as far as I can tell libnetfs lacks + some bits to make that even work + teythoon: the problem with passive translators is persistence + well, it's easy to store volatile passive translators in a + libnetfs server + but usually, there is no backing store for them + ext2fs is the only one actually providing space to store their + command line + sure, but at least file_get_translator needs to work so that + procfs can serve a mounts node + silly idea but + don't you want to directly add it to the procfs translator ? + no, I think it's useful on its own + ok + then you may need to add the required support + it even doubles as normal command line tool + yes, I almost got it... or so I hope ;) + ok + also, netfs_get_translator exists, so not supporting that feels + like a bug to me + could also be useful for a potential devfs translator + yes + + uh, the code duplication in lib*fs is really bad :/ + the code is mostly similar, though they have diverged and many + little things are different so diffing them is very noisy + stuff like file names or identifiers + and I cannot figure out why my shiny passive translators are not + started :/ + % showtrans tmp/mounts + /hurd/mtab.fixed / + % wc --bytes tmp/mounts + 0 tmp/mounts + and no mtab translator around either + +[[community/gsoc/project_ideas/mtab/discussion]]. diff --git a/open_issues/libnetfs_vs_libdiskfs.mdwn b/open_issues/libnetfs_vs_libdiskfs.mdwn new file mode 100644 index 00000000..2fcfbea5 --- /dev/null +++ b/open_issues/libnetfs_vs_libdiskfs.mdwn @@ -0,0 +1,118 @@ +[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_hurd]] + +[[!toc]] + + +# Argument Parsing + +## IRC, freenode, #hurd, 2013-06-27 + + the arg parsing in libdiskfs and libnetfs differ :/ + afaics libdiskfs gets it right, libnetfs does not + what do you mean? + wrt to *_std_{runtime,startup}_argp + see eg netfs.h + libdiskfs/opts-std-runtime.c:const struct argp + diskfs_std_runtime_argp = + libdiskfs/opts-std-runtime.c-{ + libdiskfs/opts-std-runtime.c- std_runtime_options, parse_opt, + 0, 0, children + libdiskfs/opts-std-runtime.c-}; + but + libnetfs/std-runtime-argp.c:const struct argp + netfs_std_runtime_argp = { 0 }; + well there are no common startup/runtime options provided by + netfs + usually netfs-based translators put netfs_std_startup_argp as + child as their options, so if netfs starts providing options they would + work + ah + if you have a test showing issues, we can certainly look it :) + ok, m/b I was confused... + no worries, feel free to ask anytime + I thought about providing --update as common runtime flag, like + diskfs does, you think it's the right thing to do? + what would it do? + or should it be left for each translator to implement? + nothing by default I guess + options provided in libdiskfs are implemented and handled mostly + in libdiskfs itself + so imho a new option for libnetfs would be there because its + behaviour is implemented mostly within libnetfs itself + libdiskfs calls diskfs_reload_global_state + libnetfs could do the same, allowing translators to plug in + anything they wish + but I'll implement it in procfs for the time being + ah, its alias is remount + yes + I need that working for procfs + btw, I think I got your mount confusion thing figured out + but procfs has nothing to update/flush, all the information are + fetched at every rpc + yes + but we still need to ignore the flag + otherwise the set_options rpc fails + http://paste.debian.net/12938/ + whee, remounting proc works :) + :) + + +# IRC, freenode, #hurd, 2013-07-29 + + so, what do you folks think about refactoring libdiskfs and + libnetfs to be more alike? + what do you mean? + ah, I mentioned that in the context of my mtab prototype + 1374247519-26589-1-git-send-email-4winter@informatik.uni-hamburg.de + they are hard to diff against each other b/c they differ in file + names and identifier names + while working on the mtab stuff I encountered stuff that was + implemented in libdiskfs, but never in libnetfs + mostly support for binding translators to libnetfs nodes + teythoon: sure, but looks a little out of scope + braunr: I do not mean now, more in general + ok + teythoon: I wondered about this, too. I don't know if it's + possible to literally merge them (and build the backend-based (libdiskfs) + vs. volatile-backend one (libnetfs) based on a pre-processor define or + similar), or just structure the source code (files) in a way such that + »diff -ru libdiskfs/ libnetfs/« gives meaningful results, figuratively + spoken. + tschwinge: my thoughts exactly + + +# IRC, freenode, #hurd, 2013-08-28 + + braunr: do you think another lib*fs would be frowned uppon? I + like the way procfs is structured and that could be refactored and + generalized into a library + i think we need more lib*fs libraries + and better integration + that's one of the strengths in linux + it makes writing file systems very easy + cool :) + now we only need a snappy name, any suggestions? + i don't know what you like specificlaly in procfs + libpseudofs ? + well, it's not perfect, but i like the way you just have to + implement a function for a node and it magically gains the ability to + being read + for example + yes i see + lacks a bit of caching though + no caching for such file systems + indeed + why would you want caching ? + you might have files that don't change at all, or rarely do + the premise is that it's meant for files generated on the fly + but are they big ? diff --git a/open_issues/libpthread.mdwn b/open_issues/libpthread.mdwn index 8e3fde71..0b426884 100644 --- a/open_issues/libpthread.mdwn +++ b/open_issues/libpthread.mdwn @@ -974,7 +974,7 @@ Most of the issues raised on this page has been resolved, a few remain. exec weights 164M eww, we definitely have to fix that leak the leaks are probably due to wrong mmap/munmap usage -[[exec_leak]]. +[[exec_memory_leaks]]. ### IRC, freenode, #hurd, 2012-08-29 diff --git a/open_issues/libpthread/t/fix_have_kernel_resources.mdwn b/open_issues/libpthread/t/fix_have_kernel_resources.mdwn index 4e35161f..6f09ea0d 100644 --- a/open_issues/libpthread/t/fix_have_kernel_resources.mdwn +++ b/open_issues/libpthread/t/fix_have_kernel_resources.mdwn @@ -196,3 +196,220 @@ Address problem mentioned in [[/libpthread]], *Threads' Death*. ## IRC, freenode, #hurd, 2013-07-03 grmbl, i don't want to give up thread destruction .. + + +## IRC, freenode, #hurd, 2013-07-15 + + btw, my work on thread destruction is currently stalled + i don't have much free time right now + + +## IRC, freenode, #hurd, 2013-09-13 + + i think i know why my thread_terminate_deallocate patches leak one + receive port :> + but now i'm not sure of the proper solution + every time a thread is created and destroyed, a receive right is + leaked + i guess it's simply the reply port .. + grmbl + i guess i have to make it a simpleroutine ... + hm too bad, it's not the reply port :( + it's also leaking some memory + it doesn't seem related to my changes though + stacks, rights, and threads are correctly destroyed + some obscure state is left behind + i wonder how exception ports are dealt with + vminfo seems to confirm memory is leaking in the heap + humpf + oh silly me + i don't detach threads + well, detach them ;) + hm worse :p + now i get additional dead names + but it's a step forward + + +## IRC, freenode, #hurd, 2013-09-16 + + that thread port leak is so strange + the leaked port seems to be created when the new thread starts + running + so it looks like a port the kernel would implicitely create + hm could it be a thread-specific reply port ? + ah, yes, there is one of those + how come mach/mig-reply.c in glibc isn't thread-safe ? + it is overriden by sysdeps/mach/hurd/img-reply.c I guess + which uses a threadvar for the mig reply port + oh + talking of which, there is also last_value in + sysdeps/mach/strerror_l.c + strerror_thread_freeres is supposed to get called, but who knows + it does look to be that port + iirc that's the issue which prevents from letting us make threads + exit on idleness? + one of them + ok + maybe the only one, yes + i see memory leaks but they could be related/normal + (i.e. not actual leaks) + on the other hand, i also can't boot a hurd with my patch + but i consider removing such leaks a priority + does anyone know the semantic difference between + __mig_put_reply_port and __mig_dealloc_reply_port ? + i guess __mig_dealloc_reply_port is actually a destruction + operation, right ? + AIUI, dealloc is used when one wants the port not to be reused at + all + because it has been used as a reference for something, and can + still be currently in use + while put_reply would be when we're really done with it, and won't + use it again, and can thus be used as such + or at least something like that + heh + __mig_dealloc_reply_port calls __mach_port_mod_refs, which is a + RPC, and creates a new reply port when destroying the current one + bah + that's fine, it's a deref of the old port, which is not in the + reply_port variable any more + it's fine, but still a leak + well, dealloc does not completely deallocs, yes + that's not really the problem here + i've introduced a case that wasn't considered at the time, namely + that a thread can destroy itself + we probably need another function to be called from the thread exit + i'll simply try with mach_port_destroy + mach_port_destroy seems to be a RPC too ... + grmbl + isn't there a trap version somehow ? + not in libc + erf + at least i know what's wrong now :) + there still is a small memory leak i have to investigate + but outside the stack + the stack, the thread name and the thread are correctly destroyed + slabinfo confirms only one port leak and nothing else is leaked + ok so the port leak was indeed the thread-specific reply port, + taken care of + there are also memory leaks too + + +## IRC, freenode, #hurd, 2013-09-17 + + teythoon: on my side, i'm getting to know our threading + implementation better + closing to clean thread destruction + x15 ipc will hide reply ports ;p + memory leaks solved \o/ + now, have to fix memory release when joining + proper reference counting on detach/join/exit, let's see how it + goes .. + seems to work fine + + +## IRC, freenode, #hurd, 2013-09-18 + + ok i'll soon have gnumach and libc packages including proper + thread destruction :> + braunr: why did you have to touch gnumach? + to add a call allowing threads to release ports and memory + i.e. their last self reference, their reply port and their stack + let me public my current patches + braunr: thread_commit_suicide ? + hehe + initially thread_terminate_self but + it can be used by other threads too + to i named it thread_terminate_release + http://darnassus.sceen.net/~rbraun/0001-pthread_thread_halt.patch + + http://darnassus.sceen.net/~rbraun/0001-thread_terminate_release.patch + the pthread patch needs to be polished because it changes the + semantics of pthread_thread_halt + but other than that, it should be complete + pthread_thread_halt_reallyhalt + ok let's try these libc packages + old static ext2fs for the root, but other than that, it boots + let's try iceweasel + (i'll need to build a hurd package against this new libc, removing + the libports_stability patch which prevents thread destruction in servers + on the way) + prevents thread destruction o_O + yes + in libports only ;p + oh, *only* in libports, I assumed for a moment that it affected + almost every component of the Hurd... + *phew( + ... :) + that's why, after a burst of messages, say because of aptitude + (select), you may see a few hundred threads still hanging around + also why unused servers remain running even after several minutes, + where the normal timeout is 2mins + I wondered about that, some servers (symlink comes to mind) seem + to go away if unused (or that's how I read the code) + symlinks are usually not servers, since most of them actually + exist in file systems, and are implemented through an optimization + yes I know that + trans/symlink.c reads: + /* The timeout here is 10 minutes */ + err = mach_msg_server_timeout (fsys_server, 0, control, + MACH_RCV_TIMEOUT, 1000 * 60 * 10); + if (err == MACH_RCV_TIMED_OUT) + exit (0); + ok + hm, /hurd/symlink doesn't feel at all like a symlink... but + works like one + well, starting iceweasel makes X on my host freeze oO + bbl + /hurd/symlink translators do go away after being unused for 10 + minutes... this is funny if they are set up by hand instead of being + started from a passive translator record + magically vanishing symlinks ;) + + +## IRC, freenode, #hurd, 2013-09-19 + + hum, i can't rebuild a hurd package :( + braunr: with your thread destruction patches in libc? + yes but it's unrelated + In file included from ../../libdiskfs/boot-start.c:38:0: + ./fsys_reply_U.h:173:15: error: conflicting types for + ‘fsys_get_children’ + i didn't see a new libc debian release + hm, David reported that as well + + id:CAEvUa7=QzOiS41G5Vq8k4AiaN10jAPm+CL_205OHJnL0xpJXbw@mail.gmail.com + uh oh + it seems I didn't add a _reply suffix to the reply routines :/ + there's quite a bit of fallout from my patches, I kinda feel bad + :( + teythoon: what i'm wondering is what youpi did too, since he got + hurd binary packages + braunr: well neither he nor I noticed that b/c for us the + declarations were just missing + from libc you mean ? + or hum gnumach-common ? + not sure actually + no it's not a gnumach thing + hurd-dev then + the build system should have cought these, or mig... + also, i see you changed fsys_reply.defs, but nothing about + fsys_request.defs + I have no fsys_requests.defs + looks like there was no fsys_request.defs in the first place + ... *sigh* + do you know an application that often creates and destroys threads + ? + no, sorry + maybe some test suite + ah right + sysbench maybe + also, i've been hit by a lot more network deadlocks than usual + lately + fixing netdde has gained some priority in my todo list + + +## IRC, freenode, #hurd, 2013-09-20 + + oh, git is multithreaded + great + so i've actually tested my libpthread patch quite a lot diff --git a/open_issues/libpthread_dlopen.mdwn b/open_issues/libpthread_dlopen.mdwn index 05a07ef2..5d574261 100644 --- a/open_issues/libpthread_dlopen.mdwn +++ b/open_issues/libpthread_dlopen.mdwn @@ -1,4 +1,5 @@ -[[!meta copyright="Copyright © 2011, 2012 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2011, 2012, 2013 Free Software Foundation, +Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -112,7 +113,18 @@ IRC, freenode, #hurd, 2011-08-17 [[packaging_libpthread]] ---- + +# IRC, freenode, #hurd, 2013-09-03 + + iceweasel: ./pthread/../sysdeps/generic/pt-mutex-timedlock.c:70: + __pthread_mutex_timedlock_internal: Assertion `__pthread_threads' failed. + LD_PRELOAD libpthread + why + missing link to pthread? + and yes, it's known already, just nobody worked on solving it + + +# libthreads vs. libpthread The same symptom appears in an odd case, for instance: diff --git a/open_issues/llvm.mdwn b/open_issues/llvm.mdwn index 30b18edf..4da58579 100644 --- a/open_issues/llvm.mdwn +++ b/open_issues/llvm.mdwn @@ -101,6 +101,35 @@ a06fe9183fbffb78798a444da9bc3040fdd444aa (2013-03-23), test-suite A lot of Linux-specific things. + * IRC, OFTC, #debian-hurd, 2013-09-05: + + how can this fix it on {kf,hurd}-i386? + http://anonscm.debian.org/viewvc/pkg-llvm/llvm-toolchain/branches/3.3/debian/patches/libstdc%2B%2B-header-i386.diff?view=markup&sortby=date&pathrev=830 + what makes you think it does? + it fixes #714890, which has nothing to do with hurd or + kfreebsd + i simple wouldn't add a patch that fixes it on one i386 arch + only, being aware there are others + meet sylvestre + + * IRC, freenode, #hurd, 2013-09-05: + + tschwinge: iirc you were working on llvm/clang, weren't you? + pinotree: That's right. I have patches to + follow-up/rework. Stalled at the moment, as you probably already + guessed... %-) + tschwinge: by the way, pinotree if you have time + for hurd stuff, I would be glad to have your help to port + llvm-toolchain-3.3 to hurd. I am having some issues with threading + aspects https://paste.debian.net/35466/ + he's the debian packager of llvm + That paste is for LLDB -- which I'd not assume to be in a + shape usable for Hurd. + (I didn't look at it at all.) + tschwinge: if you look at the latest llvm-toolchain-3.3 + debian source, there's a lldb-hurd.diff patch, which starts some + include header dance + # Build diff --git a/open_issues/mach_migrating_threads.mdwn b/open_issues/mach_migrating_threads.mdwn index c14ce95a..bbc6ac45 100644 --- a/open_issues/mach_migrating_threads.mdwn +++ b/open_issues/mach_migrating_threads.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2011, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -15,3 +15,89 @@ License|/fdl]]."]]"""]] * [[microkernel/mach/memory_object/discussion]] * [[resource_management_problems]] + + +# IRC, freenode, #hurd, 2013-08-13 + +In context of [[resource_management_problems]]. + + and thread migration itself is something very confusing + it's better to think of it as scheduling context inheritance + braunr: I read the paper I mentioned and then I wanted to find + the sources they modified + I failed + I hate scientific paper about software that fail to provide the + source code + that's not science imho b/c it's not reproducible + i have some osf source code here + i'll send it if you want + ah interesting + but really, when you dive into it, thread migration is merely + scheduling context inheritance with kernel upcalls + it's good + I searched for osf mach but google didn't turned up anything + but it has nothing to do with resource accounting + (well, it may hepl better account for cpu time actually) + help* + but that's all + why is that all? wouldn't that be transitive and could also be + used for i/o accounting? + also I tried to find alternative mach implementations + I wasn't terribly successful, and some sites are gone or + unmaintained for years :/ + we don't need that for io accountin + g + thread migration is a kernel property + on mach with userspace drivers, io isn't + mach should only control cpu and memory + and you though you can account physical memory, you can't transfer + virtual memory accounting from one task to another + yes, but once all of those resources can be accounted to the + thread initiating whatever it needs doing, shouldn't that be much easier? + teythoon: it's not required for that + teythoon: keep in mind userspace sees activations + in a thread migration enabled kernel, activations are what we + usually call threads, and threads are scheduling contexts + braunr: ok, so TM is not required for accounting, but surely + it's a good thing to have, no? + teythoon: it's required for cpu accounting only + which is very important :) + if you look carefully, you'll see hurd servers are what use most + cpu + there is now easy way to know which application actually uses the + server + i personally tend to think more and more that servers *should* + impersonate clients + TM (or rather, scheduling context inheritance) is one step + it's not enough exactly because it doesn't help with resource + accounting + teythoon: + ftp://ftp.mklinux.org/pub/mklinux-pre-R1/SRPMS/sources/osfmk.tar.gz + + +# IRC, freenode, #hurd, 2013-09-02 + +[[!taglink open_issue_documentation]]: move information to +[[microkernel/mach/history]]. + + braunr: btw, I just noticed lot's of #ifdef MIGRATING_THREADS in + gnumach, so there was some work being done in that direction? + gnumach is a fork of mach4 + at a stage whre migration was being worked on, yes + from what I've gathered, gnumach is the only surviving mach4 + fork, right? + yes + well + the macos x version is probably one too + i don't know + oh? I read that it was based on mach3 + it is + i can't tell how much of mach3 versus mach4 it has, and if it's + relevant at all + and the osfmach, was that also based on mach4? + yes + ok, fair enough + that's why i think macos x is based on it too + i initially downloaded osfmach sources to see an example of how + thread migration was used from userspace + and they do have a special threading library for that diff --git a/open_issues/memory_object_model_vs_block-level_cache.mdwn b/open_issues/memory_object_model_vs_block-level_cache.mdwn index 7da5dea4..22db9b86 100644 --- a/open_issues/memory_object_model_vs_block-level_cache.mdwn +++ b/open_issues/memory_object_model_vs_block-level_cache.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2012 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2012, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -10,6 +10,8 @@ License|/fdl]]."]]"""]] [[!tag open_issue_documentation open_issue_hurd open_issue_gnumach]] +[[!toc]] + # IRC, freenode, #hurd, 2012-02-14 @@ -271,3 +273,242 @@ License|/fdl]]."]]"""]] slpz: When mo_data_return is called, once the memory manager no longer needs supplied data, it should be deallocated using vm_deallocate. So this way pagers acknowledges the end of flush. + + +# IRC, freenode, #hurd, 2013-08-26 + + < Spyro> Ok, so + < Spyro> idiot question: in a nutshell, what is a memory object? + < Spyro> and how is swapping/paging handled? + < braunr> Spyro: a memory object is how the virtual memory system views a + file + < braunr> so it's a sequence of bytes with a length + < braunr> "swapping" is just a special case of paging that applies to + anonymous objects + < braunr> (which are named so because they're not associated with a file + and have no name) + < Spyro> Who creates a memory object, and when? + < braunr> pagers create memory objects when needed, e.g. when you open a + file + < Spyro> and this applies both to mmap opens as well as regular I/O opens + as in for read() and write()? + < braunr> basically, all file systems capable of handling mmap requests + and/or caching in physical memory are pagers + < braunr> yes + < braunr> read/write will go through the page cache when possible + < Spyro> and who owns the page cache? + < Spyro> also, who decides what pages ot evict to swap/file if physical + memory gets tight? + < braunr> the kernel + < braunr> that's one of the things that make mach a hybrid + < Spyro> so the kernel owns the page cage? + < Spyro> ...fml + < Spyro> cache! + < braunr> yes + + +## IRC, freenode, #hurd, 2013-08-27 + + < Spyro> so braunr: So, who creates the memory object, and how does it get + populated? + < Spyro> and how does a process accessing a file get hooked up to the + memory object? + < braunr> Spyro: i told you, pagers create memory objects + < braunr> memory objects are how the VM system views files, so they're + populated from the content of files + < braunr> either true files or virtual files such as in /proc + < braunr> Spyro: processes don't directly access memory objects unless + memory mapping them with vm_map() + < braunr> pagers (basically = file systems) do + ok, so how is a pager/fs involved in handling a fault? + + +## IRC, freenode, #hurd, 2013-08-28 + + Spyro: each object is linked to a pager + Spyro: when a fault occurs, the kernel looks up the VM map (kernel + or a user one), and the address in this map, then the map entry, checks + access and lots of other details + ok, so it's pager -> object -> vmem + ? + Spyro: then finds the object mapped at that address (similar to + how a file is mapped with mmap) + from the object, it finds the pager + ok + and asks the pager about the data at the appropriate offset + so how does a user process do normal file I/O? is faulting just a + special case of it? + it's completely separate + eww + normal I/O is done with message passing + the hurd io interface + ok + so who talks to who on a file I/O? + a client (e.g. cat) talks to a file system server (e.g. ext2fs) + ok so + it's client to the pager for regular file I/O? + Spyro: i don't understand the question + Spyro: it's client to server, the server might not be a pager + ok + just trying to figure out the difference between paging/faulting + and regular I/O + regular I/O is just message passing + page fault handling is dealt with by pagers + and I have a hunch that the fs/pager is involved somehow in both, + because the server is the source of the data + I'm getting a headache + nalaginrut: a server like ext2fs is both a file server and a pager + oh! + oh btw, does a file server make use of memory objects for caching? + Spyro: yes + or rather, can it? + does it have to? + memory objects are for caching, and thus for page faults + Spyro: for caching, it's a requirement + for I/O, it's not + you could have I/O without memory objects + ok + so how does the pager/fileserver use memory objects for caching? + does it just map and write to them? + basically yes but there is a complete protocol with the kernel for + that + + http://www.gnu.org/software/hurd/gnumach-doc/External-Memory-Management.html#External-Memory-Management + heh, lucky guess + ty + I am in way over my head here btw + zero experience with micro kernels in practice + it's not trivial + that's not a microkernel thing at all + that's how it works in monolithic kernels too + i recommend netbsd uvm thesis + there are nice pictures describing the vm system + derrr...preacious? + wow + just ignore the anonymous memory handling part which is specific + to uvm + @_@ + the rest is common to practically all VM systems out there + I know about the linux page cache + well it's almost the same + with memory objects being the same thing as files in a page cache? + memory objects are linux "address spaces" + and address spaces are how the linux mm views a file, yes + derp + ... + um... + struvt vm_page == struct page + * Spyro first must learn what an address_space is + struct vm_map == struct mm_struct + struct vm_map_entry == struct vm_area_struct + * Spyro isn't a linux kernel vm expert either + struct vm_object == struct address_space + roughly + details vary a lot + and what's an address_space ? + 11:41 < braunr> and address spaces are how the linux mm views a + file, yes + ok + see include/linux/fs.h + struct address_space_operations is the pager interface + * Spyro should look at the linux kernel sources perhaps, unless you have an + easier reference + embarrassingly, RVR hired me as an editor for the linux-mm wiki + I should know this stuff + see + http://darnassus.sceen.net/~rbraun/design_and_implementation_of_the_uvm_virtual_memory_system.pdf + page 42 + page 66 for another nice view + i wouldn't recommend using linux source as refernece + it's very complicated, filled with a lot of code dealing with + details + lmao + and linux guys have a habit of choosing crappy names + I was only going to + stoppit + except for "linux" and "git" + ...make me laugh any more and I'll need rib surgery + laugh ? + complicated and crappy + seriously, "address space" for a file is very very confusing + oh I agree with that + yes, names are crappy + and the code is very complicated + it took me half an hour to find where readahead is done once + and i'm still not sure it was the right code + so in linkern, there is an address_space for each cached file? + takes me 30 seconds in netbsd .. + yes + eww + yeah, BAD name + but thanks for the explanation + now I finally know what an address space is + many linux core developers admit they don't care much about names + so, in hurd, a memory object is to hurd, what an address_space is + to linux? + yes + notto hurd + ok + to mach + you know what I mean + :P + easier than for linux I can tell you that much + and the bsd vm system is a stripped version of the mach vm + ok + that's why i think it's important to note it + good, I learned something abou tthe linux vm...from the mach guys + this is funny + linux did too + there is a paper about linux page eviction that directly borrows + the mach algorithm and improves it + mach is the historic motivation behind mmap on posix + oh nice! + but yes, linux picked a shitty name + is all that clearer to you ? + I think that address_space connection was a magic bolt of + understanding + and do you see how I/O and paging are mostly unrelated ? + almost + but how does a file I/O take advantage of caching by a memory + object? + does the file server just nudge the core for a hint? + the file system copies from the memory object + * Spyro noddles + I think I understand a bit better now + it's message passing + but I havfe too much to digest already + memory copying + if the memory is already there, good, if not, the kernel will ask + the file system to bring the data + if message passing uses zero copy, data retrieval can be deferred + until the client actually accesses it + which is a fancy way of saying demand paging? :P + it's always demand paging + what i mean is that the file system won't fetch data as soon as it + copies memory + but when this data is actually needed by the client + uh... + whta's a precious page? + let me check quickly + If precious is FALSE, the kernel treats the data as a temporary + and may throw it away if it hasn't been changed. If the precious value is + TRUE, the kernel treats its copy as a data repository and promises to + return it to the manager + basically, it's used when you want the kernel to keep cached data + in memory + the cache becomes a lossless container for such pages + the kernel may flush them, but not evict them + what's the difference? + imagine a ramfs + point made + ok + would be pretty hard to flush something that doesn't have a backing + store + that was quick :) + well + the normal backing store for anonymous memory is the default pager + aka swap + eww + but if you want your data *either* in swap or in memory and never + in both + it may be useful diff --git a/open_issues/mig_portable_rpc_declarations.mdwn b/open_issues/mig_portable_rpc_declarations.mdwn index 084d7454..91838f60 100644 --- a/open_issues/mig_portable_rpc_declarations.mdwn +++ b/open_issues/mig_portable_rpc_declarations.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2011, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -56,3 +56,114 @@ License|/fdl]]."]]"""]] braunr: we discussed the problem of expressing structs with MIG in the libburn thread (which I still need to follow up on... [sigh]) + + +# IRC, freenode, #hurd, 2013-06-25 + + is there a nice way to get structured data through mig that I + haven't found yet? + say an array of string triples + no + :/ + but you shouldn't need that + my use case is getting info about fs translators from init to + procfs + +[[community/gsoc/project_ideas/mtab]], +[[community/gsoc/project_ideas/mtab/discussion]]. + + should I go for an iterator like interface instead? + depends + how many do you need ? + you could go for a variable sized array too + have a look at what already exists + records, maybe 10-15, depends on many fs translators are running + a variable sized array is ok if the size isn't too big (and when i + say too big, i mean hundreds of MiB) + an iterator is ok too if there aren't too many items + you may want to combine both (i think that's what proc does) + be aware that the maximum size of a message is limited to 512 MiB + yeah I saw the array[] of stuff stuff, but array[] of string_t + does not work, I guess b/c string_t is also an array + how would I send an array of variable length strings? + i'm not sure you can + or maybe out of line + somehow I expected mig to serialize arbitrary data structures, + maybe it's to old for that? + yeah, I read about uot of line, but that seems overkill + it is old yes + and not very user friendly in the end + let me check + we could stuff json into mig... + see proc_getallpids for example + we could get rid of low level serialization altogether :p + hah, exactly what I was looking at + (which is what i'll do in x15) + type pidarray_t = array[] of pid_t; + but that is trivial b/c its array[] of pid_t + and always have the server writing guide near you + yes + well, make one big string and an array of lengths :p + thought about that and said to myself, there must be a better + way that I haven't found yet + or one big string filled with real null-terminated c strings that + you keep parsing until you ate all input bytes + i'm almost certain there isn't + type string_t = c_string[1024]; /* XXX */ + yes + even that isn't really variable sized + you think anyone would object to me putting a json encoder in + /hurd/init? it is probably better than me at serializing stuff... + try with mig anyway + the less dependencies we have for core stuff, the simpler it is + but i agree, mig is painful + would it be too hacky if I abused the argz functions? they do + exactly what I'd need + + +## IRC, freenode, #hurd, 2013-06-26 + + there is https://code.google.com/p/protobuf-c/ and it has a rpc + mechanism and I believe one could plug arbitrary transports easily + please don't think about it + we really don't want to add another layer of serialization + it's better to completely redesign mach ipc anyway + and there is a project for that :p + ive seen x15 + just food for thought + i've studied google protocol buffers + and fyi, no, it wouldn't be easy to plug arbitrary transports on + top of mach + there is a lot of knowledge about mach ports in mig + +[[community/gsoc/project_ideas/mtab]], +[[community/gsoc/project_ideas/mtab/discussion]]. + + but again I face the challenge of serializing a arbitrary sized + list of arbitrary sized strings + yes + list of ports is easier ;) but I think its worthwile + so what about abusing argz* for this? you think it's too bad a + hack? + no since it's in glibc + awesome :) + but i don't remember the details well and i'm not sure the way you + use it is safe + yeah, I might have got the details wrong, I hadn't had the + chance to test it ;) + + about this dynamic size problem + a "simple" varying size array should do + you can easily put all your strings in there + seperated by 0? + yes + that's exactly what the argz stuff does + you'll get the size of the array anyway, and consume it until + there is no byte left + good + but be careful with this too + since translators can be run by users, they somtimes can't be + trusted + and even a translator running as root may behave badly + so careful with parsing + noted diff --git a/open_issues/mig_stub_functions.mdwn b/open_issues/mig_stub_functions.mdwn new file mode 100644 index 00000000..24a582b1 --- /dev/null +++ b/open_issues/mig_stub_functions.mdwn @@ -0,0 +1,41 @@ +[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_mig]] + +[[!toc]] + + +# RPC Stubs Implemented by Hand + +## IRC, freenode, #hurd, 2013-07-28 + + why is libfshelp/start-translator-long.c doing the fsys_startup + rpcs by hand instead of using the mig generated stubs? + + +## IRC, freenode, #hurd, 2013-07-29 + + btw, anyone knows why libfshelp/start-translator-long.c + implements the fsys_startup rpc by hand? + teythoon: no idea + maybe b/c of the need to specify a timeout? can one do that with + the mig stubs? + yes + select used to be implemented that way + + +# Generate the Request and Reply Routines from the Synchronous Routines + +## IRC, freenode, #hurd, 2013-09-19 + + btw, is there any reason why mig couldn't generate the request + and reply routines from the synchronous routines? + i guess it could diff --git a/open_issues/nptl.mdwn b/open_issues/nptl.mdwn index 9ff5fb51..3c84bfb0 100644 --- a/open_issues/nptl.mdwn +++ b/open_issues/nptl.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2010 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2010, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -10,7 +10,10 @@ License|/fdl]]."]]"""]] [[!tag open_issue_libpthread open_issue_glibc]] -IRC, #hurd, 2010-07-31 +[[!toc]] + + +# IRC, freenode, #hurd, 2010-07-31 Other question: how difficult is a NPTL port? Futexes and some kernel interfaces for scheduling stuff etc. -- what else? actually NPTL doesn't _require_ futexes @@ -26,6 +29,21 @@ IRC, #hurd, 2010-07-31 ... and even less so the interfavce that actual applications are using. We'd need to evaluate which benefits NPTL would bring. + +# IRC, freenode, #hurd, 2013-08-05 + + Hi, looks like kfreebsd are now using an NPTL-based pthread + library: FBTL, http://lists.debian.org/debian-bsd/2013/07/msg00060.html + Anything of interest for porting to Hurd? See also + http://lists.debian.org/debian-hurd/2013/08/msg00000.html + Petr could've been more verbose in his announcements + and there's + http://www.gnu.org/software/hurd/open_issues/nptl.html in our wiki + well, it seems to work fine for kFreeBSD: + http://lists.debian.org/debian-bsd/2013/07/msg00134.html + and http://lists.debian.org/debian-bsd/2013/07/msg00138.html + + --- # Resources diff --git a/open_issues/pthread_atfork.mdwn b/open_issues/pthread_atfork.mdwn index ac724cf0..1b656f05 100644 --- a/open_issues/pthread_atfork.mdwn +++ b/open_issues/pthread_atfork.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2011, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -8,6 +8,13 @@ Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license is included in the section entitled [[GNU Free Documentation License|/fdl]]."]]"""]] -[[!tag open_issue_libpthread]] +[[!tag open_issue_glibc open_issue_libpthread]] -pthread_atfork is not actually implemented, making some programs fail. Code can probably be borrowed from nptl/sysdeps/unix/sysv/linux/register-atfork.c +`pthread_atfork` is not actually implemented, making some programs fail. Code +can probably be borrowed from `nptl/sysdeps/unix/sysv/linux/register-atfork.c`. + + +# IRC, OFTC, #debian-hurd, 2013-08-21 + + SRCDIR/opal/mca/memory/linux/arena.c:387: warning: warning: + pthread_atfork is not implemented and will always fail diff --git a/open_issues/resource_management_problems.mdwn b/open_issues/resource_management_problems.mdwn index 8f752d61..daf97954 100644 --- a/open_issues/resource_management_problems.mdwn +++ b/open_issues/resource_management_problems.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2008, 2009, 2010 Free Software Foundation, +[[!meta copyright="Copyright © 2008, 2009, 2010, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable @@ -61,7 +61,8 @@ This is, of course, non-trivial to implement, and also requires changing the SPLICE_F_GIFT flag](http://www.kernel.org/doc/man-pages/online/pages/man2/vmsplice.2.html#DESCRIPTION).) -IRC, freenode, #hurd, 2011-07-31 + +## IRC, freenode, #hurd, 2011-07-31 < braunr> one of the biggest problems on the hurd is that, when a client makes a call, kernel (and other) resources are allocated on behalf of the @@ -75,6 +76,20 @@ IRC, freenode, #hurd, 2011-07-31 attempts) +## IRC, freenode, #hurd, 2013-08-13 + +In context of . + + teythoon: actually, thread migration isn't required for resource + accounting + +[[Mach_migrating_threads]]. + + braunr: but it solves it for free, doesn't it? + teythoon: no + it's really more complicated than that + + # Further Examples * [[hurd/critique]] @@ -83,4 +98,34 @@ IRC, freenode, #hurd, 2011-07-31 * [[translators_set_up_by_untrusted_users]], and [[pagers]] - * [[configure max command line length]] + * [[configure_max_command_line_length]] + + +## [[hurd/translator/exec]] server + +### IRC, freenode, #hurd, 2013-08-05 + + unzipping stuff in the exec server enables a dos on filesystem + translators + https://teythoon.cryptobitch.de/gsoc/heap/hello-1g.bz2 is + /hurd/hello padded with a gig of zeros, compressed with bzip2 + if set as an passive translator, it stalls other requests to the + filesystem, at least it does if ext2fs is used + teythoon: ? + teythoon: what's the dos here ? + I can prevent you from doing anything with the root filesystem + I'm kind of surprised myself, maybe a lock is held during the + exec of the translator? + the filesystem the hello-1g.bz2 translator is bound to is + affected + teythoon: i don't understand + have you tried starting something from another file system ? + the lock may simply be in the exec server itself + no, starting other things works fine + but on the other hand, a find / is stalled + :/ + *sigh* + don't worry + there is a solution :p + :) + and it only requires deleting code diff --git a/open_issues/robustness.mdwn b/open_issues/robustness.mdwn index 1f8aa0c6..a6b0dbfb 100644 --- a/open_issues/robustness.mdwn +++ b/open_issues/robustness.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2011, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -10,6 +10,7 @@ License|/fdl]]."]]"""]] [[!tag open_issue_documentation open_issue_hurd]] +[[!toc]] # IRC, freenode, #hurd, 2011-11-18 @@ -32,7 +33,9 @@ License|/fdl]]."]]"""]] ah yeah I thought so :) -# IRC, freenode, #hurd, 2011-11-19 +# Reincarnation Server + +## IRC, freenode, #hurd, 2011-11-19 will hurd ever have the equivalent of a rs server?, is that even possible with hurd? @@ -127,3 +130,40 @@ License|/fdl]]."]]"""]] neat, thanks actually it's not that old at all around 2007 + + +## IRC, freenode, #hurd, 2013-08-26 + + < teythoon> I came across some paper about process reincarnation and + created a little prototype a while back: + < teythoon> http://darnassus.sceen.net/gitweb/teythoon/reincarnation.git/ + < teythoon> and I looked into restarting the exec server in case it + dies. the exec server is an easy target since it has no state of its own + < teythoon> the only problem is that there is no exec server around to + start a new one + < youpi> teythoon: there could be another exec server only used to + (re)start the exec server + < youpi> that other exec server could even be restarted by the normal exec + server + < pinotree> what about a watchdog server? + < teythoon> youpi: yes, I had the same idea, i actually patched /hurd/init + to do that, it's just not yet working + < pinotree> make it watch other servers (exec included), and make exec + watch the watchdog only + < teythoon> pinotree: look at my prototype, there is a watchdog server + < braunr> teythoon: what's the point of reincarnation without persistence ? + < teythoon> braunr: there is no point in reincarnation w/o persistence of + course + < teythoon> my prototype does a limited form of persistence + < teythoon> the point was to see whether I can mitm a translator and + restart it on demand and to gain more insight into the whole translator + mechanism + < braunr> teythoon: ok + < teythoon> braunr: see the readme, it retains state across reincarnations + < braunr> teythoon: how ? + < teythoon> braunr: the server can store a checkpoint using the + reincarnation_checkpoint procedure + < teythoon> + http://darnassus.sceen.net/gitweb/teythoon/reincarnation.git/blame/HEAD:/reincarnation.defshttp://darnassus.sceen.net/gitweb/teythoon/reincarnation.git/blame/HEAD:/reincarnation.defs + < teythoon> uh >,< sorry, pasted twice + < braunr> oh ok diff --git a/open_issues/secure_file_descriptor_handling.mdwn b/open_issues/secure_file_descriptor_handling.mdwn index 45e983a7..f4d1d396 100644 --- a/open_issues/secure_file_descriptor_handling.mdwn +++ b/open_issues/secure_file_descriptor_handling.mdwn @@ -1,4 +1,5 @@ -[[!meta copyright="Copyright © 2010, 2011 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2010, 2011, 2013 Free Software Foundation, +Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -16,7 +17,7 @@ on this, posted patches to [[mailing_lists/libc-alpha]]. This works needs to be resumed and finished. ---- +Add tests from Linux kernel commit messages for `t/dup3` et al. In an interesting point is made: *you [may] want some [[unix/file_descriptor]] to still be open if 'exec' fails, but you diff --git a/open_issues/systemd.mdwn b/open_issues/systemd.mdwn index c23f887f..d00b3d8a 100644 --- a/open_issues/systemd.mdwn +++ b/open_issues/systemd.mdwn @@ -102,6 +102,939 @@ Likely there's also some other porting needed. just assume you can't use systemd on anything else than linux +## IRC, OFTC, #debian-hurd, 2013-08-12 + + huh, Lennert Poettering just mentioned the Hurd in his systmd talk + well, in the context of you IPC in Unix sucks and kdbus + s/you/how/ + QED + what did you expect? :) + I didn't quite get it, but he seemed to imply the Hurd was a step + in the right direction over Unix + (which is obvious, but it wasn't obvious he had that opinion) + + +## IRC, OFTC, #debian-hurd, 2013-08-13 + + so cgroups seems to be most prominent thing the systemd people + think the Hurd lacks + azeem: In 2010, I came to the same conclusion, + . ;-) + heh + I don't think of any show-stopper for implementing that -- just + someone to do it. + azeem: which part of cgroups, like being able to kill a cgroup? + it shouldn't be very hard to implement what systemd needs + probably also the resource allocation etc. + the questions are I guess (i) do the cgroups semantics make sense + from our POV and/or do we accept that cgroups is the "standard" now and + (ii) should systemd require concrete implementations or just the concept + in a more abstract sense + being the first non Linux OS that runs systemd would be a nice + showcase of Hurds flexibility + maybe upstart is less trouble + azeem: possibly + teythoon: can you just include upstart in your GSOC? kthxbye + at least libnih (the library with base utilities and such used + by upstart) required a working file monitor (and the current + implementation kind of exposes a fd) and certain semantics for waitid + libnih/upstart have "just" the issue of being under CLA... + pinotree: yeah, true + I suggested "startup" as a name for a fork + imho there would be no strict need to fork + azeem: but upstart is a lot less interesting. last time I used + it it wasn't even possible to disable services in a clean way + pinotree: is that still so now that Scott works for google? + pochu: yeah, since it's a Canonical CLA, not rally something + tied to a person + (iirc) + sure, but scott is the maintainer... + shrug + nah, scott left upstart + AFAIK + at least James Hunt gave a talk earlier with Steve Langasek and + introduced himself as the upstart maintainer + also I heard in the hallway track that the upstart people are + somewhat interested in BSD/Hurd support as they see it as a selling point + against systemd + pochu: it's just like FSF CLA for GNU projects: even if the + maintainers/contributors change altogether, copyright assignment is still + FSF + but their accents were kinda annoying/hard to follow so I didn't + follow their talk closesly to see whether they brought it up + pinotree: well, it's not + azeem: looking at https://code.launchpad.net/libnih, I'm not sure + libnih has a maintainer anymore... + pinotree: first off, you're not signing over the copyright with + their CLA, just giving them the right to relicense + pinotree: but more importantaly, the FSF announced in a legally + binding way that they will not take things non-free + anyway, I'll talk to the upstart guys about libnih + + +## IRC, OFTC, #debian-hurd, 2013-08-15 + + btw, I talked to vorlon about upstart and the Hurd + so the situation with libnih is that it is basically + feature-complete, but still maintained by Scott + upstart is leveraging it heavily + and Scott was (back in the days) against patches for porting + for upstart proper, Steve said he would happily take porting + patches + + +## IRC, freenode, #hurd, 2013-08-26 + + < youpi> teythoon: I tend to agree with mbanck + < youpi> although another thing worth considering would be adding something + similar to control groups + < youpi> AIUI, it's one of the features that systemd really requires + < braunr> uhg, cgroups already + < braunr> youpi: where is that discussion ? + < youpi> it was a private mail + < braunr> oh ok + < teythoon> right, so about upstart + < teythoon> to be blunt, I do not like upstart, though my experience with + it is limited and outdated + < braunr> that was quick :) + < braunr> i assume this follows your private discussion with youpi and + mbank ? + < teythoon> I used it on a like three years old ubuntu and back then it + couldn't do stufft hat even sysvinit could do + < teythoon> there was not much discussion, mbank suggested that I could + work on upstart + < teythoon> b/c it might be easier to support than systemd + < teythoon> which might be very well true, then again what's the benefit of + having upstart? I'm really curious, I should perhaps read up on its + features + < pinotree> event-based, etc + < youpi> it is also about avoiding being pushed out just because we don't + support it? + < teythoon> yes, but otoh systemd can do amazing things, the featurelist of + upstart reads rather mondane in comparison + < youpi> I don't really have an opinion over either, apart from portability + of the code + < braunr> teythoon: the system requirements for systemd would take much + time to implement in comparison to what we already have + < braunr> i still have maksym's work on last year gsoc on my list + < braunr> waiting to push in the various libpager related patches first + < teythoon> so you guys think it's worthwile to port upstart? + < braunr> no idea + < braunr> teythoon: on another subject + < azeem_> teythoon: I like systemd more, but the hallway track at Debconf + seemed to imply most people like Upstart better except for the CLA + < azeem_> which I totally forgot to address + < youpi> CLA ? + < azeem_> contributor license agreement + < braunr> since you've now done very good progress, is your work available + in the form of ready-to-test debian packages ? + < teythoon> braunr: it is + < teythoon> braunr: http://teythoon.cryptobitch.de/gsoc/heap/debian/ + < braunr> i remember urls in some of your mails + < braunr> ah thanks + < braunr> "cryptobitch" hum :) + < azeem_> in any case, everbody assumed either Upstart or Systemd are way + ahead of systemvinit + < braunr> sysvinit is really ancient :) + < azeem_> apart from the non-event-driven fundamental issue, a lot of + people critized that the failure rate at writing correct init-scripts + appears to be too high + < azeem_> one of the questions brought up was whether it makes sense to + continue to ship/support systemvinit once a switch is made to + systemd/upstart for the Linux archs + < azeem_> systemvinit scripts might bitrot + < azeem_> but anyway, I don't see a switch happen anytime soon + < teythoon> well, did upstart gain the capability of disabling a service + yet? + < azeem_> teythoon: no idea, but apparently: + http://askubuntu.com/questions/19320/recommended-way-to-enable-disable-services/20347#20347 + < teythoon> azeem_: then there is hope yet ;) + < azeem_> the main selling point of Upstart is that it shipped in several + LTS releases and is proven technology (and honestly, I don't read a lot + of complaints online about it) + < azeem_> (I don't agree that SystemD is unproven, but that is what the + Upstart guys implied) + < teythoon> am I the only one that thinks that upstart is rather + unimpressive? + * azeem_ doesn't have an opinion on it + < azeem> teythoon: + http://penta.debconf.org/dc13_schedule/events/1027.en.html has slides and + the video + < azeem> teythoon: eh, appears the link to the slides is broken, but they + are here: + http://people.canonical.com/~jhunt/presentations/debconf13/upstart-debconf-2013.pdf + < braunr> teythoon: actually, from the presentation, i'd tend to like + upstart + < braunr> dependency, parallelism and even runlevel compatibility flows + naturally from the event based model + < braunr> sysv compatibility is a great feature + < braunr> it does look simple + < braunr> i admit it's "unimpressive" but do we want an overkill init + system ? + < braunr> teythoon: what makes you not like it ? + < azeem> Lennart critized that upstart doesn't generate events, just + listens to them + < azeem> (which is a feature, not a bug to some) + < braunr> azeem: ah yes, that could be a lack + < azeem> braunr: http://penta.debconf.org/dc13_schedule/events/983.en.html + was the corresponding SystemD talk by Lennart, though he hasn't posted + slides yet I think + < teythoon> braunr: well, last time I used it it was impossible to cleanly + disable a service + < teythoon> also ubuntu makes such big claims about software they develop, + and when you read up on them it turns out that most of the advertised + functionality will be implemented in the near future + < teythoon> then they ship software as early as possible only to say later + that is has proven itself for so many years + < teythoon> and tbh I hate to be the one that helped port upstart to hurd + (and maybe kfreebsd as a byproduct) and later debian choses upstart over + systemd b/c it is available for all debian kernels + < kilobug> teythoon: ubuntu has a tendency to ship software too early when + it's not fully mature/stable, but that doesn't say anything about the + software itself + < pinotree> teythoon: note the same is sometimes done on fedora for young + technologies (eg systemd) + < azeem> teythoon: heh, fair enough + < p2-mate> braunr: I would prefer if my init doesn't use ptrace :P + < teythoon> p2-mate: does upstart use ptrace? + < p2-mate> teythoon: yes + < teythoon> well, then I guess there won't be an upstart for Hurd for some + time, no? + < kilobug> p2-mate: why does it use ptrace for ? + < p2-mate> kilobug: to find out if a daemon forked + < kilobug> hum I see + < azeem> p2-mate: the question is whether there's a Hurdish way to + accomplish the same + < p2-mate> + http://bazaar.launchpad.net/~upstart-devel/upstart/trunk/view/head:/init/job_process.c + < p2-mate> see job_process_trace_new :) + < kilobug> azeem: it doesn't seem too complicated to me to have a way to + get proc notify upstart of forks + < p2-mate> azeem: that's a good question. there is a linuxish way to do + that using cgroups + < azeem> right, there's a blueprint suggesting cgroups for Upstart here: + https://blueprints.launchpad.net/ubuntu/+spec/foundations-q-upstart-overcome-ptrace-limitations + < teythoon> yes, someone should create a init system that uses cgroups for + tracking child processes >,< + < teythoon> kilobug: not sure it is that easy. who enforces that proc_child + is used for a new process? isn't it possible to just create a new mach + task that has no ties to the parent process? + < teythoon> azeem: what do you mean by "upstart does not generate events"? + there are "emits X" lines in upstart service descrpitions, surely that + generates event X? + < azeem> I think the critique is that this (and those upstart-foo-bridges) + are bolted on, while SystemD just takes over your systems and "knows" + about them first-hand + < azeem> but as I said, I'm not the expert on this + < teythoon> uh, in order to install upstart one has to remove sysvinit + ("yes i am sure...") and it fails to bring up the network on booting the + machine + < teythoon> also, both systemd and upstart depend on dbus, so no cookie for + us unless that is fixed first, right? + < pinotree> true + < teythoon> well, what do you want me to do for the next four weeks? + < youpi> ideally you could make both upstart and systemd work on hurd-i"86 + < pinotree> both in 4 weeks? + < youpi> so hurd-i386 doesn't become the nasty guy that makes people tend + for one or the other + < youpi> I said "ideally" + < youpi> I don't really have any idea how much work is required by either + of the two + < youpi> I'd tend to think the important thing to implement is something + similar to control groups, so both upstart (which is supposed to use them + someday) and systemd can be happy about it + < teythoon> looks like upstarts functionality depending on ptrace is not + required, but can be enabled on a per service base + < teythoon> so a upstart port that just lacks this might be possible + < teythoon> youpi: the main feature of cgroups is that a process cannot + escape its group, no? i'm not sure how this could be implemented atop of + mach in a secure and robust way + < teythoon> b/c any process can just create mach tasks + < youpi> maybe we need to add a feature in mach itself, yes + < teythoon> ok, implementing cgroups sounds fun, I could do that + < youpi> azeem: are you ok with that direction? + < azeem> well, in general yes; however, AIUI, cgroups is being redesigned + upstream, no? + < youpi> that's why I said "something like cgroups" + < azeem> ah, ok + < youpi> we can do something simple enough to avoid design quesetions, and + that would still be enough for upstart & systemd + < azeem> + (http://www.linux.com/news/featured-blogs/200-libby-clark/733595-all-about-the-linux-kernel-cgroups-redesign) + btw + < braunr> p2-mate: upstart uses ptrace ? + < p2-mate> yes + < youpi> teythoon: and making a real survey of what needs to be fixed for + upstart & systemd + < p2-mate> see my link posted earlier + < braunr> ah already answered + < braunr> grmbl + < braunr> it's a simple alternative to cgroups though + < braunr> teythoon: dbus isn't a proble + < braunr> problem + < braunr> it's not that hard to fix + < youpi> well, it hasn't been fixed for a long time now :) + < braunr> we're being slow, that's all + < braunr> and interested by other things + < gg0> 12:58 < teythoon> btw, who is this heroxbd fellow and why has he + suddenly taken interest in so many debian gsoc projects? + < gg0> http://lists.debian.org/debian-hurd/2013/05/msg00133.html + < gg0> i notice nobody mentioned openrc + < pinotree> he's the debian student working on integrating openrc + < gg0> pinotree: no, the student is Bill Wang, Benda as he says is a + co-mentor + https://wiki.debian.org/SummerOfCode2013/Projects#OpenRC_init_system_in_Debian + < pinotree> whatever, it's still the openrc gsoc + < azeem> well, they wanted to look at it WRT the Hurd, did they follow-up + on this? + < gg0> btw wouldn't having openrc on hurd be interesting too? + < pinotree> imho not really + < gg0> no idea whether Bill is also trying to figure out what to do, + probably not + < azeem> somebody could ping that thread you mentioned above to see whether + they looked at the Hurd and/or need help/advice + < gg0> azeem: yeah somebody who could provide such help/advice. like.. you? + for instance + * gg0 can just paste urls + < azeem> they should just follow-up on-list + + +## IRC, freenode, #hurd, 2013-08-28 + + anyone knows a user of cgroups that is not systemd? so far I + found libcg, that looks like a promising first target to port first, + though not surprisingly it is also somewhat linux specific + teythoon: OpenRC optionally uses cgroups IIRC. + Not mandatory because unlike systemd it actually tries (at all) + to be portable. + + +## IRC, freenode, #hurd, 2013-09-02 + + braunr: I plan to patch gnumach so that the mach tasks keep a + reference to the task that created them and to make that information + available + braunr: is such a change acceptable? + teythoon: what for ? + braunr: well, the parent relation is currently only implemented + in the Hurd, but w/o this information tracked by the kernel I don't see + how I can prevent malicious/misbehaving applications to break out of + cgroups + also I think this will enable us to fix the issue with tracking + which tasks belong to which subhurd in the long term + ah cgroups + yes cgroups should partly be implemented in the kernel ... + teythoon: that doesn't surprise me + i mean, i think it's ok + the kernel should implement tasks and threads as closely as the + hurd (or a unix-like system) needs it + braunr: ok, cool + braunr: I made some rather small and straight forward changes to + gnumach, but it isn't doing what I thought it would do :/ + braunr: http://paste.debian.net/33717/ + you added a field to task_basic_info + thereby breaking the ABI + braunr: my small test program says: my task port is 1(pid 13) + created by task -527895648; my parent task is 31(pid 1) + braunr: no, it is not. I appended a field and these structures + are designed to be extendable + hm + ok + although i'm not so sure + there are macros defining the info size, depending on what you ask + you may as well get garbage + have you checked that ? + i initialized my struct to zero before calling mach + teythoon: can you put some hardcoded value, just to make sure data + is correctly exported ? + braunr: right, good idea + braunr: my task port is 1(pid 13) created by task 3; my parent + task is 31(pid 1) -- so yes, hardcoding 3 works + ok + braunr: also I gathered evidence that the convert_task_to_port + thing works, b/c first I did not have the task_reference call just before + that so the reference count was lowered (convert... consumes a reference) + and the parent task was destroyed + braunr: I must admit I'm a little lost. I tried to return a + reference to task rather than task->parent_task, but that didn't work + either + braunr: I feel like I'm missing something here + maybe I should get aquainted with the kernel debugger + err, the kernel debugger is not accepting any symbol names, even + though the binary is not stripped o_O + err, neither the kdb nor gdb attached to qemu translates + addresses to symbols, gdb at least translates symbols to addresses when + setting break points + how did anyone ever debug a kernel problem under these + conditions? + teythoon: i'll have a look at it when i have some time + + +## IRC, freenode, #hurd, 2013-09-03 + + :/ I believe the startup_notify interface is ill designed... an + translator can defer the system shutdown indefinitely + it can + that's bad + yes + the hurd has a general tendency to trust its "no mutual trust + required" principle + to rely on it a bit too much + well, at least it's a privileged operation to request this kind + of notification, no? + why ? + teythoon: it normally is used mostly by privileged servers + but i don't think there is any check on the recipient + braunr: b/c getting the port to /hurd/init is done via + proc_getmsgport + teythoon: ? + braunr: well, in order to get the notifications one needs the + msgport of /hurd/init and getting that requires root privileges + teythoon: oh ok then + teythoon: what's bad with it then ? + braunr: even if those translators are somewhat trusted, they can + (and do) contain bugs and stall the shutdown + I think this even happened to me once, I think it was the pfinet + translator + teythoon: how do you want it to behave instead ? + braunr: well, /hurd/init notifies the processes sequentially, + that seems suboptimal, better to send async notifications to all of them + and then to collect all the answers + braunr: if one fails to answer within a rather large time frame + (say 5 minutes) shutdown anyway + i agree with async notifications but + i don't agree with the timeout + for reference, a (voluntary) timeout of 1 minute is hardcoded in + /hurd/init + the timeout should be a parameter + it's common on large machines to have looong shutdown delays + of the notification? + the answer means "ok i'm done you can shutdown" + well this can take long + most often, administrators simply prefer to trust their program is + ok and won't take longer than it needs to, even if it's long + and not answering at all causes the shutdown / reboot to fail + making the system hang + i know + in a state where it is not easily reached if you do not have + access to it + but since it only concerns essential servers, it should befine + essential servers are expected to behave well + it concerns servers that have requested a shutdown notification + ok so no essential but system servers + essential servers are only exec, proc, / + yes + the same applies + init and auth too, no? + yes + you expect root not to hang himself + I do expect all software to contain bugs + yes but you also expect them to provide a minimum level of + reliability + otherwise you can just throw it all away + no, not really + well + I know, that's my dilemma basically ;) + if you don't trust your file system, you make frequent backups + if you don't trust your shutdown code, you're ready to pull the + plug manually + (or set a watchdog or whatever) + what i mean is + we should NEVER interfere with a program that is actually doing + its job just because it seems too long + timeouts are almost never the best solution + they're used only when necessary + e.g. across networks + it's much much much worse to interrupt a proper shutdown process + because it "seems too long" than just trust it behaves well 99999%%%% of + the time + in particular because this case deals with proper data flushing, + which is an extremely important use case + it's hard/theoretically impossible to distinguish between taking + long and doing nothing + it's impossible + agreed + => trust + if you don't trust, you run real time stuff + and you don't flush data on disk + ^^ + (which makes a lot of computer uses impossible as well) + there are only 2 people I trust, and the other one is not + /hurd/pfinet + if this shutdown procedure is confined to the TCB, it's fine to + trust it goes well + tcb? + trusted computing base + http://en.wikipedia.org/wiki/Trusted_computing_base + * teythoon shudders + "trust" is used way to much these days + and I do not like the linux 2.0 ip stack to be part of our TCB + basically, on a multiserver system like the hurd, the tcb is every + server on the path to getting a service done from a client + then make it not request to be notified + or make two classes of notifications + because unprivileged file systems should be notified too + indeed + by the way, we should have a hurdish libnotify or something for + this kind of notifications + but in any case, it should really be policy + we should ... :) + ^^ + + +## IRC, freenode, #hurd, 2013-09-04 + + braunr: btw, I now believe that no server that requested + shutdown notifications can stall the shutdown for more than 1 minute + *unless* its message queue is full + so any fs should better sync within that timeframe + where is this 1 min defined ? + init/init.c search for 60000 + ew + did I just find the fs corruption bug everyone was looking for? + no + what corruption bug ? + not sure, I thought there was still some issues left with + unclean filesystems every now and then + *causing + yes but we know the reasons + ah + involving some of the funniest names i've seen in computer + terminology : + writeback causing "message floods", which in turn create "thread + storms" in the servers receiving them + ^^ it's usually the other way around, storms causing floods >,, + teythoon: :) + let's say it's a bottom-up approach + then the fix is easy, compile mach with -DMIGRATING_THREADS :) + teythoon: what ? + well, that would solve the flood/storm issue, no? + no + the real solution is proper throttling + which can stem from synchronous rpc (which is the real property we + want from migrating threads) + but the mach writeback interface is async + :p + + +## IRC, freenode, #hurd, 2013-09-05 + + teythoon: oh right, forgot about your port issue + don't worry, I figured by now that this must be a pointer + and I'm probably missing some magic that transforms this into a + name for the receiver + (though I "found" this function by looking at the mig + transformation for ports) + i was wondering why you called the convert function manually + instead of simply returning the task + and let mig do the magic + b/c then I would have to add another ipc call, no? + let me see the basic info call again + my problem with this code is that it doesn't take into account the + ipc space of the current task + which means you probably really return the ipc port + the internal kernel address of the struct + indeed, ipc_port_t convert_task_to_port(task) + i'd personally make a new rpc instead of adding it to basic info + basic info doesn't create rights + what you want to achieve does + you may want to make it a special port + i.e. a port created at task creation time + y? + it also means you need to handle task destruction and reparent + yes, I thought about that + see + http://www.gnu.org/software/hurd/gnumach-doc/Task-Special-Ports.html#Task-Special-Ports + for now you may simply turn the right into a dead name when the + parent dies + although adding a call and letting mig do it is simpler + mig handles reference counting, users just need to task_deallocate + once done + o_O mig does reference counting of port rights? + mig/mach_msg + is there anything it *doesn't* do? + i told you, it's a very complicated messaging interface + coffee ? + fast ? + ^^ + mig knows about copy_send/move_send/etc... + so even if it doesn't do reference counting explicitely, it does + take care of that + true + in addition, the magic conversions are intended to both translate + names into actual structs, and add a temporary reference at the same time + teythoon: everything clear now ? :) + braunr: no, especially not why you suggested to create a special + port. but this will have to wait for tomorrow ;) + + +## IRC, OFTC, #debian-hurd, 2013-09-06 + + teythoon: hi there + so I've been following your blog entries about cgroups on + hurd... very impressive :) + but I think there's a misunderstanding about upstart and + cgroups... your "conjecture" in + https://teythoon.cryptobitch.de/posts/what-will-i-do-next-cgroupfs-o/ is + incorrect + cgroups does not give us the interfaces that upstart uses to + define service readiness; adding support for cgroups is interesting to + upstart for purposes of resource partitioning, but there's no way to + replace ptrace with cgroups for what we're doing + vorlon: hi and thanks for the fish :) + vorlon: what is it exactly that upstart is doing with ptrace + then? + .,oO( your nick makes me suspicious for some reason... ;) + service readiness, what does that mean exactly? + teythoon: so upstart uses ptrace primarily for determining service + readiness. The idea is that traditionally, you know an init script is + "done" when it returns control to the parent process, which happens when + the service process has backgrounded/daemonized; this happens when the + parent process exits + in practice, however, many daemons do this badly + so upstart tries to compensate, by not just detecting that the + parent process has exited, but that the subprocess has exited + (for the case where the upstart job declares 'expect daemon') + cgroups, TTBOMK, will let you ask "what processes are part of this + group" and possibly even "what process is the leader for this group", but + doesn't really give you a way to detect "the lead process for this group + has changed twice" + now, it's *better* in an upstart/systemd world for services to + *not* daemonize and instead stay running in the foreground, but then + there's the question of how you know the service is "ready" before moving + on to starting other services that depend on it + systemd's answer to this is socket-based activation, which we + don't really endorse for upstart for a variety of reasons + hm, okay + so upstart does this only if expect daemon is declared in the + service description? + (in part because I've seen security issues when playing with the + systemd implementation on Fedora, which Lennart assures me are + corner-cases specific to cups, but I haven't had a chance to test yet + whether he's right) + and it is not used to track children, but only to observe the + daemonizing process? + yes + and it then detaches from the processes? + yes + once it knows the service is "ready", upstart doesn't care about + tracking it; it'll receive SIGCHLD when the lead process dies, and that's + all it needs to know + ok, so I misunderstood the purpose of the ptracing, thanks for + clarifying this + my pleasure :) + I realize that doesn't really help with the problem of hurd not + having ptrace + no, but thanks anyway + fwiw, the alternative upstart recommends for detecting service + readiness is for the process to raise SIGSTOP when it's ready + doesn't require ptracing, doesn't require socket-based activation + definitions; does require the service to run in a different mode than + usual where it will raise the signal at the correct time + right, but that requires patching it, same as the socket + activation stuff of systemd + (this is upstart's 'expect stop') + yes + though at DebConf, there were some evil ideas floating around + about doing this with an LD_PRELOAD or similar ;) + (overriding 'daemonize') + er, 'daemon()' + ^^ + and hey, what's suspicious about my /nick? vorlons are always + trustworthy + ;) + sure they are + but could this functionality be reasonably #ifdef'ed out for a + proof of concept port? + hmm, you would need to implement some kind of replacement... if + you added cgroups support to upstart as an alternative + that could work + i.e., you would need upstart to know when the service has exited; + if you aren't using ptrace, you don't know the "lead pid" to watch for, + so you need some other mechanism --> cgroups + and even then, what do you do for a service like openssh, which + explicitly wants to leave child processes behind when it restarts? + right... + oh, I was hoping you knew the answer to this question ;) Since + AFAICS, openssh has no native support for cgroups + >,< I don't, but I'll think about what you've said... gotta go, + catch what's left of the summer ;) + fwiw I consider fork/exec/the whole daemonizing stuff fubar... + see you around :) + later :) + + +## IRC, OFTC, #debian-hurd, 2013-09-07 + + vorlon: I thought about upstarts use of ptrace for observing the + daemonizing process and getting hold of the child + vorlon: what if cgroup(f)s would guarantee that the order of + processes listed in x/tasks is the same they were added in? + vorlon: that way, the first process in the list would be the + daemonized child after the original process died, no? + teythoon: that doesn't tell you how many times the "lead" process + has changed, however + you need synchronous notifications of the forks in order to know + that, which currently we only get via ptrace + + +## IRC, OFTC, #debian-hurd, 2013-09-08 + + vorlon: ok, but why do the notifications have to be synchronous? + does that imply that the processes need to be stopped until upstart does + something? + teythoon: well, s/synchronous/reliable/ + you're right that it doesn't need to be synchronous; but it can't + just be upstart polling the status of the cgroup + because processes may have come and gone in the meantime + vorlon: ok, cool, b/c the notifications of process changes I'm + hoping to introduce into the proc server for my cgroupfs do carry exactly + this kind of information + cool + are you discussing an API for this with the Linux cgroups + maintainers? + otoh it would be somewhat "interesting" to get upstart to use + this b/c of the way the mach message handling is usually implemented + :) + no, I meant in order for me to be able to implement cgroupfs I + had to create these kind of notifications, it's not an addition to the + cgroups api + is upstart multithreaded? + no + threads are evil ;) + ^^ I mostly agree + it uses a very nice event loop, leveraging signalfd among other + things + uh oh, signalfd sounds rather Linuxish + it is + I think xnox mentioned when he was investigating it that kfreebsd + now also supports it + but yeah, AFAIK it's not POSIX + it isn't, yes + but it darn well should be + :) + it's the best improvement to signal handling in a long time + systemd also uses signalfd + umm, it seems I was wrong about Hurd not having ptrace, the wiki + suggests that we do have it + FSVO "have" + ^^ + vorlon: teythoon: so ok kFreeBSD/FreeBSD ideally I'd be using + EVFILT_PROC from kevent which allows to receive events & track: exit, + fork, exec, track (follow across fork) + upstart also uses waitid() + so a ptrace/waitid should be sufficient to track processes, if Hurd + has them. + + +## IRC, freenode, #hurd, 2013-09-09 + + teythoon: yes, the shutdown notifications do stall the process + but no more than a minute, or so + teythoon: btw, did you end up understanding the odd thing in + fshelp_start_translator_long? + I haven't had the time to have a look + youpi: what odd thing? the thing about being implemented by hand + instead of using the mig stub? + the thing about the port being passed twice + XXX this looks wrong to me, please have a look + in the mach_port_request_notification call + ah, that was alright, yes + ok + so I can drop it from my TODO :) + this is done on the control port so that a translator is + notified if the "parent" translator dies + was that in fshelp_start_translator_long though? I thought that + was somewhere else + that's what the patch file says + +++ b/libfshelp/start-translator-long.c + @@ -293,6 +293,7 @@ fshelp_start_translator_long (fshelp_open_fn_t + underlying_open_fn, + + /* XXX this looks wrong to me, bootstrap is used twice as + argument... */ + bootstrap, + MACH_NOTIFY_NO_SENDERS, 0, + right + I remember that when I got a better grip of the idea of + notifications I figured that this was indeed okay + I'll have a quick look though + ok + ah, I remember, this notifies the parent translator if the child + dies, right + and it is a NO_SENDERS notification, so it is perfectly valid to + use the same port twice, as we only hold a receive right + + +## IRC, freenode, #hurd, 2013-09-10 + + braunr: are pthreads mapped 1:1 to mach threads? + teythoon: yes + I'm reading the Linux cgroups "documentation" and it talks about + tasks (Linux threads) and thread group IDs (Linux processes) and I'm + wondering how to map this accurately onto Hurd concepts... + apparently on Linux there are PIDs/TIDs that can be used more or + less interchangeably from userspace applications + the Linux kernel however knows only PIDs, and each thread has + its own, and those threads belonging to the same (userspace) PID have the + same thread group id + aiui on Mach threads belong to a Mach task, and there is no + global unique identifier exposed for threads, right? + braunr: ^ + teythoon: There is its thread port, which in combination with + its task port should make it unique? (I might be missing context.) + Eh, no. The task port's name will only locally be unique. + * tschwinge confused himself. + tschwinge, braunr: well, the proc server could of course create + TIDs for threads the same way it creates PIDs for tasks, but that should + probably wait until this is really needed + for the most part, the tasks and cgroup.procs files contain the + same information on Linux, and not differentiating between the two just + means that cgroupfs is not able to put threads into cgroups, just + processes + that might be enough for now + + +## IRC, freenode, #hurd, 2013-09-11 + + ugh, some of the half-backed Linux interfaces will be a real + pain in the ass to support + they do stuff like write(2)ing file descriptors encoded as + decimal numbers for notifications :-/ + teythoon: for cgroup ? + braunr: yes, they have this eventfd based notification mechanism + braunr: but I fear that this is a more general problem + do we need eventfd ? + I mean passing FDs around is okay, we can do this just fine with + ports too, but encoding numbers as an ascii string and passing that + around is just not a nice interface + so what ? + it's not a designed interface, it's one people came up with b/c + it was easy to implement + if it's meant for compatibility, that's ok + how would you implement this then? as a special case in the + write(2) implementation in the libc? that sounds horrible but I do hardly + see another way + ok, some more context: the cgroup documentation says + write " " to cgroup.event_control. + where event_fd is the eventfd the notification should be sent to + theorically they could have used sendmsg + a custom payload + control_fd is an fd to the pseudo file one wants notifications + for + yes, they could have, that would have been nicer to implement + but this... + + +## IRC, freenode, #hurd, 2013-09-12 + + ugh, gnumachs build system drives me crazy %-/ + oh there's worse than that + I added a new .defs file, did as Makerules.mig.am told me to do, + but it still does not create the stubs I need + teythoon: gnumach doesn't + teythoon: glibc does + well, gnumach only creates the stubs it needs + teythoon: you should perhaps simply use gnumach.defs + braunr: sure it does, e.g. vm/memory_object_default.user.c + teythoon: what are you trying to add ? + braunr: I was trying to add a notification mechanism for new + tasks + b/c now the proc server has to query all task ports to discover + newly created tasks, this seems wasteful + also if the proc server could be notified on task creation, the + parent task is still around, so the notification can carry a reference to + it + that way gnumach wouldn't have to track the relationship, which + would create all kind of interesting questions, like whether tasks would + have to be reparented if the parent dies + teythoon: notifications aren't that simple either + y not? + 1/ who is permitted to receive them + 2/ should we contain them to hurd systems ? (e.g. should a subhurd + receive notifications concerning tasks in other hurd systems ?) + that's easy imho. 1/ a single process that has a host_priv + handle is able to register for the notifications once + what are the requirements so cgroups work as expected concerning + tasks ? + teythoon: a single ? + i.e. the first proc server that starts + then how will subhurd proc servers work ? + 2/ subhurds get the notifications from the first proc server, + and only those that are "for them" + ok + i tend to agree + this removes the ability to debug the main hurd from a subhurd + this way the subhurds proc server doesn't even have to have the + host_priv porsts + yes, but I see that as a feature tbh + me too + and we can still debug the subhurd from the main + it still works the other way around, so it's still... + yes + what would you include in the notification ? + a reference to the new task (proc needs that anyway) adn one to + the parent task (so proc knows what the parent process is and/or for + which subhurd it is) + ok + 17:21 < braunr> what are the requirements so cgroups work as + expected concerning tasks ? + IOW, why is the parental relation needed ? + (i don't know much about the details of cgroup) + well, currently we rely on proc_child to build this relation + but any task can use task_create w/o proc_child + until one claims a newly created task with proc_child, its + parent is pid 1 + that's about the hurd + i'm rather asking about cgroups + ah + the child process has to end up in the same cgroup as the parent + does a cgroup include its own pid namespace ? + not quite sure what you mean, but I'd say no + do you mean pid namespace as in the Linux sense of that phrase? + cgroups group processes(threads) into groups + on Linux, you can then attach controllers to that, so that + e.g. scheduling decisions or resource restrictions can be applied to + groups + braunr: http://paste.debian.net/38950/ + teythoon: ok so a cgroup is merely a group of processes supervised + by a controller + for resource accounting/scheudling + teythoon: where does dev_pager.c do the same ? + braunr: yes. w/o such controllers cgroups can still be used for + subprocess tracking + braunr: well, dev_pager.c uses mig generated stubs from + memory_object_reply.defs + ah memory_object_reply ok + teythoon: have you tried adding it to EXTRA_DIST ? + although i don't expect it will change much + teythoon: hum, you're not actually creating client stubs + create a kern/task_notify.cli file + as it's done with device/memory_object_reply.cli + see #define KERNEL_USER 1 + braunr: right, thanks :) + + +## IRC, freenode, #hurd, 2013-09-13 + + hm, my notification system for newly created tasks kinda works + as in I get notified when a new task is created + but the ports for the new task and the parent that are carried + in the notification are both MACH_PORT_DEAD + do I have to add a reference manually before sending it? + that would make sense, the mig magic transformation function for + task_t consumes a reference iirc + ah yes + that reference counting stuff is some hell + braunr: ah, there's more though, the mig transformations are + only done in the server stub, not in the client, so I still have to + convert_task_to_port myself afaics + awesome, it works :) + :) + ugh, the proc_child stuff is embedded deep into libc and signal + handling stuff... + "improving" the child_proc stuff with my shiny new notifications + wrecks havoc on the system + + # Required Interfaces In the thread starting diff --git a/open_issues/thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock.mdwn b/open_issues/thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock.mdwn index cf41550d..7159551d 100644 --- a/open_issues/thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock.mdwn +++ b/open_issues/thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2010 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2010, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -16,26 +16,37 @@ License|/fdl]]."]]"""]] IRC, unknown channel, unknown date: - azeem, marcus: ext2fs.static: thread-cancel.c:55: hurd_thread_cancel: Assertion '! __spin_lock_locked (&ss->critical_section_lock)' failed + azeem, marcus: ext2fs.static: thread-cancel.c:55: + hurd_thread_cancel: Assertion '! __spin_lock_locked + (&ss->critical_section_lock)' failed I actually don't understand this assertion it's just before __spin_lock (&ss->critical_section_lock); why should one check that a lock is free before taking it ? just the same in hurdexec.c - (no, ss is not our own sigstate, so it's not safe to assume no other path can take it) + (no, ss is not our own sigstate, so it's not safe to assume no + other path can take it) there's another one in sysdeps/mach/hurd/spawni.c and jmp-unwind.c - youpi: why do you think it's nonsense?... the fact that we take the lock (so we can't be interrupted) doesn't mean we are willing to wait for others to release the lock... maybe the code path should never be reached while others have a lock, or something + youpi: why do you think it's nonsense?... the fact that we take + the lock (so we can't be interrupted) doesn't mean we are willing to wait + for others to release the lock... maybe the code path should never be + reached while others have a lock, or something then it's useless to take the lock - "we take the lock (so we can't be interrupted)": no, it's not _our_ lock here, it's the lock of the thread we want to cancel - what exactly is cancelling a thread?... (sorry, I don't really have experience with thread programming) + "we take the lock (so we can't be interrupted)": no, it's not _our_ + lock here, it's the lock of the thread we want to cancel + what exactly is cancelling a thread?... (sorry, I don't really + have experience with thread programming) ~= killing it - well, we take the lock so nobody can mess with the thread while we are cancelling it, no?... + well, we take the lock so nobody can mess with the thread while we + are cancelling it, no?... yes that is fine - but checking that the lock is free before taking it doesn't make sense + but checking that the lock is free before taking it doesn't make + sense why nobody should be able to take the lock ? - and if nobody is, why do we take it ? (since nobody would be able to take it) - well, maybe after taking the lock, we do some action that might result in others trying to take it... + and if nobody is, why do we take it ? (since nobody would be able + to take it) + well, maybe after taking the lock, we do some action that might + result in others trying to take it... nope: look at the code :) or maybe the cancel_hook, but I really doubt it - diff --git a/open_issues/time.mdwn b/open_issues/time.mdwn index becb88b0..367db872 100644 --- a/open_issues/time.mdwn +++ b/open_issues/time.mdwn @@ -11,6 +11,11 @@ License|/fdl]]."]]"""]] [[!tag open_issue_porting]] +[[!toc]] + + +# `time` + Neither the `time` executable from the GNU time package work completely correctly, nor does the GNU Bash built-in one. @@ -56,20 +61,20 @@ As above; also here all the running time should be attributed to *user* time. This is probably a [[!taglink open_issue_gnumach]]. -# 2011-09-02 +## 2011-09-02 Might want to revisit this, and take Xen [[!tag open_issue_xen]] into account -- I believe flubber has already been Xenified at that time. -## IRC, freenode, #hurd, 2011-09-02 +### IRC, freenode, #hurd, 2011-09-02 While testing some [[performance/IPC_virtual_copy]] performance issues: And I can confirm that with dd if=/dev/zero of=/dev/null bs=4k running, a parallel sleep 10 takes about 20 s (on strauss). -# 2013-03-30/31 +## 2013-03-30/31 Investigating time's `configure`, a difference of the output between Linux and Hurd shows: @@ -81,3 +86,754 @@ This causes a different code path in `resuse.c` to be used; such code path does not get a define for `HZ`, which is then defined with a fallback value of 60. [[!debbug 704283]] has been filed with a fix for this no-wait3 case. + + +# `times` + +## guile + +### IRC, freenode, #hurd, 2013-08-21 + + does guile2 on hurd fixed? times issue + nalaginrut: does not look good + scheme@(guile-user)> (times) + $1 = #(0 0 0 0 0) + well, seems not a fixed version, if there's fixed version + since it's not Guile's bug, I can do nothing for it + ah + in spite of this, Guile2 works I think + all tests passed but 2 fail + one of the failure is version shows "UNKNOWN" which is + trivials + well, did you try to fix the times issue in Hurd? + I didn't , I have to get more familiar with hurd first + I'm playing hurd these days + :) + anyway, I think times issue is beyond my ability at present + ;-P + times is implemented in the glibc, in sysdeps/mach/hurd/times.c + don't say that before you had a look + yes, you're right + but I think times has something to do with the kernel time + mechanism, dunno if it's related to the issue + how did you get the times.c under hurd? + apt-get source glibc? + well, I'd clone git://sourceware.org/git/glibc.git + and yes, the kernel is involved + task_info is used to obtain the actual values + + http://www.gnu.org/software/hurd/gnumach-doc/Task-Information.html + I'd guess that something fails, but the times(2) interface is + not able to communicate the exact failure + maybe it's not proper to get src from upstream git? since it's + OK under Linux which uses it too + but apt-get source glibc has nothing + so I would copy the times(2) implementation from the libc so + that you can modify it and run it as a standalone program + well, the libc has system dependent stuff, times(2) on Linux is + different from the Hurd version + it has to be + alright, I got what you mean ;-) + and the debian libc is built from the eglibc sources, so the + source package is called eglibc iirc + ah~I'll try + have you tried to rpctrace your times test program? the small c + snippet you posted the other day? + I haven't build all the tools & debug environment on my hurd + ;-( + what tools? + well, I don't even have git on it, and I'm installing but + speed is slow, I'm looking for a new mirror + ah well, no need to do all this on the Hurd directly + building the libc takes like ages anyway + oops ;-) + I'll take your advice to concentrate on times.c only + oh well, it might be difficult after all, not sure though + times sends two task_info messages, once with TASK_BASIC_INFO, + once with TASK_THREAD_TIMES_INFO + here is the relevant rpctrace of your test program: + task131(pid14726)->task_info (1 10) = 0 {0 25 153427968 643072 0 + 0 0 0 1377065590 570000} + task131(pid14726)->task_info (3 4) = 0 {0 0 0 10000} + ok, I don't know enough about that to be honest, but + TASK_THREAD_TIMES_INFO behaves funny + I put a sleep(1) into your test program, and if I rpctrace it, + it behaves differently o_O + * nalaginrut is reading task-information page to get what it could be + maybe I have to do the same steps under Linux to find some + clue + no, this is Mach specific, there is no such thing on Linux + on Linux, times(2) is a system call + on Hurd, times is a function implemented in the libc that + behaves roughly the same way + OK~so different + look at struct task_basic_info and struct task_thread_times_info + in the task-information page for the meaning of the values in the + rpctrace + yes, very + nalaginrut: you may want to try a patch i did but which is still + waiting to be merged in glibc + braunr: ah~thanks for did it ;-) + can I have the link? + i'm getting it + teythoon: funny things happen with rpctrace, that's expected + keep in mind rpctrace doesn't behave like ptrace at all + it acts as a proxy + nalaginrut: + http://git.savannah.gnu.org/cgit/hurd/glibc.git/commit/?h=rbraun/getclktck_100_hz&id=90404d6d1aa01f6ce1557841f5a675bb6a30f508 + nalaginrut: you need to add it to the debian eglibc patch list, + rebuild the packages, and install the resulting .debs + if you have trouble doing it, i'll make packages when i have time + braunr: I think your test result is expected? ;-) + what test result ? + times test under that patch + yes + but i have no idea if it will work + my patch fixes a mismatch between glibc and the procfs server + nothing more + it may help, it may not, that's what i'd like to know + hah~thanks for that + I get source from apt-get, then manually modified the files, + no much code ;-) + compiling + there is no cpuinfo in /proc? + no + a feature need to be done? or there's another way for that? + well, it hasn't been implemented + do you need that? what for? + compiling error, I realized I should use gcc-4.7 + how are you building? + I just happened to play proc while compiling, and found + there's no + cxa_finalize.c:48:1: error: ‘tcbhead_t’ has no member + named ‘multiple_threads’ + I changed to gcc-4.7 + just edit the sources, and then dpkg-buildpackage -nc -us -uc + that will rebuild the debian package as it would be in a debian + build, making sure all the build dependencies are there, etc + doing it different than that is just wrong™ + ok, doing + were you really doing ./configure etc yourself? + well, I can't wait till it's done, I'll let it compile and + check it out tomorrow + I used configure, yes ;-P + not good + I have to go, thanks for help guys + + +### IRC, freenode, #hurd, 2013-08-22 + + < nalaginrut> eglibc was done by dpkg-buildpackage, then how to install it? + (sorry I'm a brand new debian users) + < nalaginrut> oh~I found it + < nalaginrut> yes, (times) returns reasonable result ;-) + * nalaginrut is trying 'make check' + < nalaginrut> unfortunately, it can't pass the test though, I'm researching + it, anyway, we made first step + < nalaginrut> for Hurd internal-time-units-per-second will be 1000 + < nalaginrut> , but the elapsed time is far larger than (* 2 + internal-time-units-per-second) + < nalaginrut> I think the different of two returned clocks after 1 second + should be the TIME_UNITS_PER_SECOND, in principle + < nalaginrut> but I'm not sure if it's elibc or Guile bug + < nalaginrut> dunno, maybe clock tick should be 1000? + < nalaginrut> well, I'll try clock per second as 1000 + < braunr> nalaginrut: clock tick (or actually, the obsolete notion of a + clock tick in userspace) should be 100 + < braunr> nalaginrut: how did you come with 1000 ? + < nalaginrut> braunr: Guile set TIME_UNITS_PER_SECOND to 1000 when there's + no 8bytes size and doesn't define HAVE_CLOCK_GETTIME + < nalaginrut> #if SCM_SIZEOF_LONG >= 8 && defined HAVE_CLOCK_GETTIME + < nalaginrut> #define TIME_UNITS_PER_SECOND 1000000000 + < nalaginrut> #else + < nalaginrut> #define TIME_UNITS_PER_SECOND 1000 + < nalaginrut> #endif + < nalaginrut> and the test for 'times' used time-units-per-second + < pinotree> what has sizeof(long) have to do with time units per second? + < nalaginrut> dunno, maybe the representation of time? + < nalaginrut> the test failed since the difference between two clocks after + 1sec is too large + < nalaginrut> and for the test context, it should small than 2 times of + units-per-second + < nalaginrut> should be smaller + < nalaginrut> sorry for bad English + < pinotree> aren't you basically looking for clock_getres? + < nalaginrut> pinotree: I don't understand what you mean + < pinotree> + http://pubs.opengroup.org/onlinepubs/9699919799/functions/clock_getres.html + < nalaginrut> I wonder if there's a standard CLK_PER_SEC for Hurd + < nalaginrut> or it can be modified as wish + < pinotree> why do you need it? + < nalaginrut> the difference is 10,000,000, which can never less than + 2*clock_per_second + < nalaginrut> pinotree: I don't need it, but I want to know if there's a + standard value + < braunr> nalaginrut: ok so, this is entirely a guile thing + < braunr> nalaginrut: did you test with my patch ? + < nalaginrut> braunr: yes, 'times' works fine + < braunr> but even with that, a tets fails ? + < braunr> test* + < nalaginrut> well, I can't say works fine, the proper description is "now + it has reasonable result" + < braunr> youpi: could you bring + http://darnassus.sceen.net/gitweb/savannah_mirror/glibc.git/commit/90404d6d1aa01f6ce1557841f5a675bb6a30f508 + into debian glibc btw ? + < nalaginrut> braunr: it failed the test since the clock run too fast, but + it should be smaller than 2*clk-per-sec + < braunr> i don't get that + < braunr> can you show the code that checks the condition ? + < nalaginrut> braunr: http://pastebin.com/sG3QxnPt + < braunr> * 0.5 internal-time-units-per-second ? + < nalaginrut> for C users, it's just like + a=times(...);sleep(1);b=times(...); then time-units-per-sec/2 <= (b-a) <= + time-units-per-sec*2 + < braunr> ah ok + < nalaginrut> the test passes when it's true + < braunr> so basically, it says sleep(1) sleeps for more than 2 seconds + < braunr> can you check the actual value ? + < braunr> b-a + < nalaginrut> hold on for minutes + < nalaginrut> it's 10,000,000 + < nalaginrut> for clk-per-sec=1000,000,000, it's OK + < nalaginrut> but for 100 or 1000, it's too small + < braunr> let's forget 100 + < braunr> guile uses 1000 + < nalaginrut> OK + < braunr> but i still don't get why + < nalaginrut> so I asked if there's standard value, or it can be ajustified + < nalaginrut> adjusted + < braunr> ok so, times are expressed in clock ticks + < braunr> are you sure you're using a patched glibc ? + < nalaginrut> yes I used your patch, and the 'times' get reasonable result + < braunr> then + < braunr> 11:28 < nalaginrut> it's 10,000,000 + < braunr> doesn't make sense + < nalaginrut> hmm + < braunr> anhd i don't understand the test + < braunr> what's tms:clock new ? + < nalaginrut> it's actually the return value of 'times' + < nalaginrut> Guile wrap the clock_t and tms to a vector, then we can get + all the thing in a row + < nalaginrut> 'new' is a variable which was gotten after 1 sec + < braunr> let's see what this does exactly + < nalaginrut> equal to "new = times(...)" + < nalaginrut> 'tms' equal to (clock_t (struct tms)) + < nalaginrut> we have to pass in the struct pointer to get the struct + values filled, but for Guile we don't use pointer, times actually returns + two things: clock_t and struct tms + < nalaginrut> and Guile returns them as a vector in a row, that's it + < braunr> nalaginrut: test this please: + http://darnassus.sceen.net/~rbraun/test.c + < braunr> i don't have a patched libc here + < braunr> i'll build one right now + < nalaginrut> clock ticks: 1000000 + < braunr> and this seems reasonable to you ? + < braunr> anyway, i think the guile test is bugged + < nalaginrut> no, the reasonable is not for this + < braunr> does it ever get the clock tick value from sysconf() ? + < nalaginrut> I say reasonable since it's always 0 both for clock and tms, + before apply your patch + < braunr> uh no + < braunr> i have the same value, without my patch + < nalaginrut> so I said "I can't say it works fine" + < braunr> either the test is wrong because it doesn't use sysconf() + < nalaginrut> anyway, I don't think times should return "all zero" + < braunr> or the clock values have already been ocnverted + < braunr> but it doesn't + < braunr> you did something wrong + < nalaginrut> with your patch it doesn't + < braunr> without neither + < braunr> 11:43 < braunr> i have the same value, without my patch + < nalaginrut> well, it's too strange + < braunr> check how the test actually gets the clock values + < braunr> also, are your running in vbox ? + < braunr> you* + < nalaginrut> no ,it's physical machine + < braunr> oh + < braunr> nice + < braunr> note that vbox has timing issues + < nalaginrut> I thought I should give you some info of CPU, but there's no + /proc/cpuinfo + < braunr> shouldn't be needed + < nalaginrut> OK + < braunr> run my test again with an unpatched glibc + < braunr> just to make sure it produces the same result + < braunr> and + < nalaginrut> so the clock-per-sec is machine independent for Hurd I think + < braunr> 11:46 < braunr> check how the test actually gets the clock values + < nalaginrut> since it's implemented in userland + < braunr> clock-per-sec is always system dependent + < braunr> All times reported are in clock ticks. + < braunr> The number of clock ticks per second can be obtained + using: + < braunr> sysconf(_SC_CLK_TCK); + < braunr> 11:46 < braunr> check how the test actually gets the clock values + < braunr> to see if they're converted before reaching the test code or not + * nalaginrut is building eglibc + < braunr> building ? + < braunr> what for ? + < nalaginrut> I modified it to 1000, now it's useless + < braunr> we want it to 100 either way + < nalaginrut> and how to reinstall eglibc under debian? + < braunr> it's obsolete, procfs already uses 100, and 100 is low enough to + avoid overflows in practically all cases + < braunr> aptitude install libc0.3= + < nalaginrut> OK + < braunr> aptitude show -v libc0.3 + < braunr> for the list of available versions + < nalaginrut> out of topic, what's the meaning of the code in + quantize_timeval ? + < nalaginrut> tv->tv_usec = ((tv->tv_usec + (quantum - 1)) / quantum) * + quantum; + < nalaginrut> I can't understand this line + < braunr> scaling and rounding i guess + < nalaginrut> hmm...but quantum seems always set to 1? + < nalaginrut> 100/__getclktck() + < braunr> ah right + < braunr> old crap from the past + < nalaginrut> and clk-tck is 100 + < braunr> the author probably anticipated clk_ticks could vary + < braunr> in practice it doesn't, and that's why it's been made obsolete + < nalaginrut> I wonder if it could be vary + < braunr> no + < nalaginrut> alright + < nalaginrut> why not just assign it to 1? + < braunr> 11:55 < braunr> old crap from the past + < braunr> the hurd is 20 years old + < braunr> like linux + < nalaginrut> oh~ + < braunr> but with a lot less maintenance + < nalaginrut> braunr: well, I tried the original eglibc, your test was + clock ticks: 1000000 + < nalaginrut> but in Guile, (times) ==> (0 0 0 0 0) + < nalaginrut> the reasonable result maybe: #(4491527510000000 80000000 0 0 + 0) + < braunr> 11:46 < braunr> check how the test actually gets the clock values + < braunr> ah, he left + + +### IRC, freenode, #hurd, 2013-08-23 + + < braunr> nalaginrut: times() doesn't seem to be affected by my patch at + all + < nalaginrut> braunr: but it did in my machine + < nalaginrut> well, I think you mean it doesn't affect your C test code + < braunr> i'm almost sure something was wrong in your test + < braunr> keep using the official debian glibc package + < nalaginrut> I don't think it's test issue, since every time (times) + return zero, the test can never get correct result + < braunr> times doesn't return 0 + < braunr> for sleep(1), i always have the right result, except in + microseconds + < nalaginrut> times in Guile always return #(0 0 0 0 0) + < braunr> (microseconds is the native mach time unit) + < braunr> well, guile does something wrong + < nalaginrut> after sleep 1, it's 0 again, so it's none sense + < braunr> 11:46 < braunr> check how the test actually gets the clock values + < braunr> not on my system + < nalaginrut> but (times) returns reasonable result after applied your + patch + < braunr> that's not normal, since times isn't affected by my patch + < nalaginrut> oops + < braunr> you need to look for what happens in guile between the times() + call and the #(0 0 0 0 0) values + < nalaginrut> well, I tried many times between patch or non-patch, I think + there's no mistake + < nalaginrut> I read the 'times' code in Guile, there's nothing strange, + just call 'times' and put all the result to a vector + < braunr> which means there is no conversion + < braunr> in which case the test is plain wrong since there MUST also be a + call to sysconf() + < braunr> to obtain the right clock ticks value + < braunr> is your box reachable with ssh ? + < nalaginrut> oh~wait, seems there's a quotient operation, I'm checking + < nalaginrut> factor = scm_quotient (scm_from_long (TIME_UNITS_PER_SECOND), + < nalaginrut> scm_from_long (ticks_per_second)); + < braunr> iirc, TIME_UNITS_PER_SECOND is hardcoded + < nalaginrut> unless factor is zero + < nalaginrut> yes, it's hardcoded + < braunr> that's completely non portable and wrong + < nalaginrut> you suggest to call sysconf? + < braunr> yes + < braunr> but i don't have the code in mind + < braunr> what is ticks_per_second ? + < nalaginrut> OK, that's one issue, we have to find why times return 0 + < braunr> 14:14 < braunr> is your box reachable with ssh ? + < braunr> i'd like to make sure times returns 0 at your side + < braunr> because it doesn't at mine + < nalaginrut> no + < braunr> until i can reproduce, i can't consider there is a problem + < nalaginrut> I think it's unreachable for outer space + < nalaginrut> well, if you want to reproduce, just get guile src of debian + < braunr> guile 2.0 ? + < nalaginrut> yes, apt-get source guile-2.0 + < nalaginrut> I'm checking ticks_per_second + < braunr> got the source, how do i test + < braunr> ? + < nalaginrut> you have to build it, and run ./meta/guile, then you don't + have to install it + < nalaginrut> and try (times) + < braunr> aw libgc + < nalaginrut> the reasonable result should be #(4313401920000000 110000000 + 20000000 0 0) or something alike + < nalaginrut> but #(0 0 0 0 0) in each time is not reasonable apparently + < nalaginrut> maybe you need apt-get build-dep guile-2.0? + < braunr> already done + < nalaginrut> building Guile2 may take very long time + < nalaginrut> about 30 minutes in my old machine + < braunr> then it should take just a few minutes on mine + < nalaginrut> alright it's not very long, I've spent 8 hours for gcc in LFS + < braunr> 8 hours ? + < braunr> takes 5-10 minutes on a common machine .. + < nalaginrut> but it's Celeron566 at that time... + < braunr> ah, that old + < nalaginrut> include bootstrap, so very long + < braunr> nalaginrut: i got the test failure from the build procedure, how + do i run it manually ? + < nalaginrut> braunr: ./meta/guile -L test-suite + test-suite/tests/time.test + < nalaginrut> braunr: or make check for all + < braunr> put a print after the schedule() and before the return nil; in + runtime_mstart, since that's the body of new threads + < nlightnfotis> unfortunately, I can't confirm this with goroutines + running; the assertion failure aborts before I can get anything useful + < braunr> you can + < braunr> make sure there is a \n in the message, since stdout is line + buffered by default + < braunr> if you don't reach that code, it means threads don't exit + < braunr> at least goroutine threads + < braunr> btw, where is the main thread running ? + < nlightnfotis> I just checked there is a \n at the end. + < nlightnfotis> " btw, where is the main thread running " could you + elaborate a little bit on this? + < braunr> what does main() after initializing the runtime ? + < braunr> +do + < nlightnfotis> the runtime main or the process's main? + < braunr> the process + < braunr> nlightnfotis: what we're interested in is knowing whether main() + exits or not + < nlightnfotis> braunr: I can see there are about 4 functions of interest: + runtime_main (the main goroutine, and I can imagine 1st thread) + < nlightnfotis> main_init (I don't know what it does, will check this out + now) + < nlightnfotis> main_main (not sure about this one either) + < nlightnfotis> and runtime_exit (0) + < braunr> i can see that too + < braunr> i'm asking about main() + < nlightnfotis> which seems to be the function that terminates the main + thread + < nlightnfotis> nlightnfotis: what we're interested in is knowing + whether main() exits or not --> my theory is runtime_exit (0) exits the + process' main. Seeing as at various times go programs echo $? == 0. + < nlightnfotis> let me research that a little bit + < nlightnfotis> braunr: that will require a bit more studying. main_main() + and main_init() are both expanded to assembly tags if I understand it + correctly. + < nlightnfotis> main.main and __go_init_main respectively. + < braunr> why are you looking from there instead of looking from main() ? + < nlightnfotis> are we not looking out if main exits? + < braunr> we are + < braunr> so why look at main_main ? + < braunr> or anything else than main ? + < nlightnfotis> these are called inside runtime_main and I figured out they + might have a clue + < braunr> runtime_main != main + < braunr> (except if there is aliasing) + < nlightnfotis> there is still the possibility that runtime_main is the + main function and that runtime_exit(0) exits it. + < braunr> there is no doubt that main is main + < braunr> (almost) + < nlightnfotis> and I just found out that there is no main in assembly + produced from go. Only main.main + < braunr> check the elf headers for the entry point then + < nlightnfotis> braunr: I went through the headers, and found the process' + main. You can find it in /libgo/runtime/go-main.c + < nlightnfotis> it seems very strange though: It creates a new thread, then + aborts? + < braunr> nlightnfotis: see :) + < braunr> nlightnfotis: add traces there + < nlightnfotis> braunr: can you look into that piece of code to check out + something I don't understand? + < nlightnfotis> braunr: I can not seem able to find __go_go 's definition + < nlightnfotis> only a declaration in runtime.h + < braunr> + https://github.com/NlightNFotis/gcc/blob/master/libgo/runtime/proc.c, + line 1552 + < nlightnfotis> gee thanx. For a strange kind of fashion, I was looking for + it in runtime.c + < braunr> use git grep + < braunr> or tags/cscope + < nlightnfotis> braunr: yep! runtime_exit does seem to terminate a go + process that was not otherwise abnormally terminated. + < braunr> ? + < braunr> is it called or not ? + < braunr> runtime_exit is a macro on exit() + < braunr> so we already know what it does + < nlightnfotis> it is called + < braunr> ok + < braunr> that's not normal :) + < nlightnfotis> for a simple program + < braunr> uh ? + < nlightnfotis> for one that has a go routine + < braunr> but + < nlightnfotis> it doesn't + < nlightnfotis> it's expected + < braunr> ok + < braunr> that makes sense + < braunr> well, trace + < braunr> keep tracing + < braunr> for example in main() + < braunr> is runtime_mstart() actually reached ? + < nlightnfotis> yeah main and runtime_main were my next two targets + < braunr> good + < nlightnfotis> and now I followed your advice and it does compiler much + faster + < braunr> so, it looks like the main thread just becomes a mere kernel + thread + < braunr> running runtime_mstart() and fetching goroutines as needed + < braunr> after your traces, i'd suggest running a small go test program, + with one simple goroutine (doesn't crash right ?) + < braunr> and trace context switching + < braunr> but after the traces + < braunr> one important trace is to understand why runtime_exit gets called + < nlightnfotis> it does crash even with 1 goroutine + < braunr> oh + < braunr> when doesn't it crash ? + < nlightnfotis> when it has 0 goroutines + < nlightnfotis> it works as expected + < nlightnfotis> but anything involving goroutines crashes + < nlightnfotis> and goroutines are very important; everything in the + standard library involves goroutines + < braunr> ok + < braunr> doesn't change what i suggested, good + < braunr> 1/ find out why runtime_exit gets called + < braunr> 2/ trace context switching with 1 goroutine + < nlightnfotis> on it. + < braunr> in all cases, make all your goroutines (including the main one) + *not* return + < braunr> so that you don't deal with goroutine destruction yet + < nlightnfotis> runtime_mstart in main doesn't to be run at all. So the + path is __go_go and then return from it. + < nlightnfotis> *doesn't seem + + +### IRC, freenode, #hurd, 2013-08-26 + + < braunr> youpi: my glibc clock patch looks incomplete btw + < youpi> which one? + < youpi> ah, the ticks one? + < braunr> yes + < braunr> it doesn't change the values returned by times + < braunr> as a side effect, the load average bumps to 2+ on an idle machine + + +### IRC, freenode, #hurd, 2013-08-27 + + < nalaginrut> braunr: have you tried Guile2 on your machine? ;-) + < braunr> nalaginrut: no + < braunr> nalaginrut: but i saw the code actually does use sysconf() + < nalaginrut> braunr: yes, for ticks_per_second + < braunr> i had to look myself to find it out, you didn't say it, despite + me asking multiple times + < braunr> it won't make debugging easier ;p + < braunr> nalaginrut: also, the return value of times is actually *never* + used + < braunr> i don't know why you've been talking about it so much + < nalaginrut> braunr: I'm sorry, it's first time to look stime.c for me + < braunr> the interesting function is get_internal_run_time_times() + < nalaginrut> what do you mean about "the return value of times is actually + *never* used"? in which context? + < braunr> see get_internal_run_time_times + < braunr> struct tms time_buffer; + < braunr> times(&time_buffer); + < braunr> return ... + < braunr> and yes, the user and system time reported in struct tms are 0 + < braunr> let's see what posix has to say about it + < pinotree> it says it will return (clock_t)-1 for errors, but no standard + errors are defined yet + < nalaginrut> but I don't think get_internal_run_time_times has something + to do with scm_times + < braunr> well, i don't see any other call to times() + < braunr> i've asked you repeatedly to look for how guile fetches the data + < braunr> i think it's done in get_internal_run_time_times + < braunr> what makes you think otherwise ? + < braunr> our times() seems to behave fine, other than the units of the + return value + < nalaginrut> I don't understand what do you mean? + get_internal_run_time_times is unrelated to scm_times which is actually + "times" in Scheme code + < braunr> ok + < nalaginrut> I think we're talking about "times" activity, right? + < braunr> ok so result is a vector + < braunr> with the return value and the four values in struct tms + < nalaginrut> yes + < braunr> and what looks interesting is + < braunr> factor = scm_quotient (scm_from_long (TIME_UNITS_PER_SECOND), + scm_from_long (ticks_per_second)); + < braunr> SCM_SIMPLE_VECTOR_SET (result, 0, scm_product (scm_from_long + (rv), factor)); + < braunr> TIME_UNITS_PER_SECOND is 1000 + < nalaginrut> yes, it means (clock_t * + (TIME_UNITS_PER_SECOND/ticks_per_second)), though I've no idea why it + does this + < braunr> normalizing values i guess + < nalaginrut> I wonder if the factor should be 1, just guessing + < braunr> let's see what our clock tick really is + < braunr> 1000000 on an unmodified libc + < braunr> 100 with my patch + < nalaginrut> so what's the problem? + < nalaginrut> all the values were multiplied by ticks, it's fair for the + subtraction + < nalaginrut> I think the problem is clock is too large for the difference + between utime and utime(sleep 1) + < nalaginrut> oops, is too small + < nalaginrut> sorry, I confused, + < nalaginrut> the problem is the difference of clock is too large for + 2*internal-time-units-per-second + < nalaginrut> and actually, internal-time-units-per-second is + SCM_TIME_UNITS_PER_SECOND + < nalaginrut> but without your patch, 'times' would return zeros all the + time, which is never meet the condition: SCM_TIME_UNITS_PER_SECOND/2 <= + (clock2 - clock1) + < nalaginrut> well, maybe your point is + TIME_UNITS_PER_SECOND/ticks_per_second is too small without your patch, + which causes the scm_to_long cast give a 0 value + < nalaginrut> s/cast/casting + < nalaginrut> when ticks_per_second is 100, the factor would be 10, which + seems to be reasonable + < nalaginrut> s/scm_to_long/scm_from_long + < nalaginrut> well, I have to checkout this + < nalaginrut> OK, let me reconstruct the point: ticks_per_second so too + large that makes the factor becomes zero + < nalaginrut> but decrease ticks_per_second to 100 causes the clock become + too large than TIME_UNITS_PER_SECOND + < braunr> 10:59 < nalaginrut> but without your patch, 'times' would return + zeros all the time, which is never meet the condition: + SCM_TIME_UNITS_PER_SECOND/2 <= (clock2 - clock1) + < braunr> until you prove me otherwise, this is plain wrong + < braunr> times() never returned me 0 + < braunr> so let's see, this gives us a factor of 1000 / 1000000 + < braunr> so the problem is factor being 0 + < braunr> that's why *guile* times returns 0 + < braunr> with my patch it should return 10 + < nalaginrut> braunr: I'm sorry I mean "stime" in Scheme returns zeros + < nalaginrut> yes, I think the problem is factor + < nalaginrut> the factor + < braunr> now why doesn't my patch fix it all ? + < braunr> ah yes, rv is still in microseconds + < braunr> that's what i've been telling youpi recently, my patch is + incomplete + < braunr> i'll cook a quick fix, give me a few minutes please + < nalaginrut> but it fixed something ;-) + < braunr> well, guile makes a stupid assumption here + < braunr> so it's not really a fix + < nalaginrut> braunr: should I ask some info about TIME_UNITS_PER_SECOND + from Guile community? + < nalaginrut> or it doesn't help + < braunr> what do you want to ask them ? + < nalaginrut> since I don't know how this value was chosen + < nalaginrut> dunno, I'll ask if you need it + < nalaginrut> I just think maybe you need this info + < braunr> well + < braunr> my plan is to align the hurd on what other archs do + < braunr> i.e. set clk_tck to 100 + < braunr> in which case this won't be a problem any more + < braunr> now you could warn them about the protability issue + < braunr> i'm not sure if they would care though + < nalaginrut> the warning is useful for the future + < nalaginrut> and it's not hard to make a change I think, for a constant, + but it depends on the maintainers + < braunr> it's not that simple + < braunr> time related things can easily overflow in the future + < nalaginrut> alright + < braunr> refer to the 2038 end-of-the-world bug + < nalaginrut> so how can I describe the warning/suggestion to them? + < braunr> i'm not sure + < braunr> tell them the TIME_UNITS_PER_SECOND isn't appropriate for larger + values of clk_tck + < braunr> dammit, microseconds are hardcoded everywhere in + sysdeps/mach/hurd ... >( + < braunr> nalaginrut: my new patch seems to fix the problem + < braunr> nalaginrut: i've built debian packages with which you can + directly test + < braunr> nalaginrut: deb http://ftp.sceen.net/debian-hurd-i386 + experimental/ + < braunr> Totals for this test run: + < braunr> passes: 38605 + < braunr> failures: 0 + < braunr> unexpected passes: 0 + < braunr> expected failures: 7 + < braunr> unresolved test cases: 578 + < braunr> untested test cases: 1 + < braunr> unsupported test cases: 10 + < braunr> errors: 0 + < braunr> PASS: check-guile + < braunr> ============= + < braunr> 1 test passed + < braunr> ============= + < braunr> :) + < braunr> youpi: the branch i added to glibc contains a working patch for + clock_t in centiseconds + < youpi> k + + +### IRC, freenode, #hurd, 2013-08-28 + + braunr: well, looks great! I'll try it soon~ + braunr: BTW, where is the patch/ + braunr: what was needed to get guile working on the hurd? + well, if the fix wasn't to guile, I don't need the details. + 04:53 < nalaginrut> braunr: BTW, where is the patch/ + there is hardly anyone here at 5am + nalaginrut: + http://git.savannah.gnu.org/cgit/hurd/glibc.git/log/?h=rbraun/clock_t_centiseconds + braunr: thanks for that, but why not use a constant for 100? + nalaginrut: i don't know where to define it + it's glibc, you don't define new stuff mindlessly + braunr: about your centiseconds patch, did you run the libc + testsuite with it? + it does seem a shame to reduce the resolution of the timers + from microseconds to centiseconds. I wonder if that could be avoided. + by fixing all applications which assume centiseconds + *nod* well, if there's such a problem in Guile, I'd be glad + to fix that. + youpi: no + I see that there's a macro CLOCKS_PER_SEC that programs + should consult. + braunr: ok, I'll do then + mark_weaver: why is it a shame ? + it's not clock or timer resolution + it's clock_t resolution + it's an obsolete api to measure average cpu usage + having such a big value on the other hand reduces the cpu usage + durations + braunr: good point :) I confess to being mostly ignorant of + these APIs. + Though Guile should still consult CLOCKS_PER_SEC instead of + assuming centiseconds. If it's making an improper assumption, I'd like + to know so I can fix it. + the improper assumption is that there are less than 1000 clock + ticks per second + do you know off-hand of some code in Guile that is making + improper assumptions? + yes + let me find it + thanks + factor = scm_quotient (scm_from_long (TIME_UNITS_PER_SECOND), + scm_from_long (ticks_per_second)); + it seems guile attempts to normalize all times values to + TIME_UNITS_PER_SECOND + while i think it would be better off using ticks_per_second (clock + ticks as provided by sysconf()) + attempting to normalize here causes factor to become 0 if + TIME_UNITS_PER_SECOND < ticks_per_second + ah, I see. + I'll take care of it. thanks for the pointer! + braunr: I've commited the centisecond patch to debian's glibc + + +### IRC, freenode, #hurd, 2013-08-29 + + braunr: Guile2 works smoothly now, let me try something cool + with it + nalaginrut: nice diff --git a/open_issues/tmux.mdwn b/open_issues/tmux.mdwn new file mode 100644 index 00000000..f71d13e1 --- /dev/null +++ b/open_issues/tmux.mdwn @@ -0,0 +1,24 @@ +[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_porting]] + +# IRC, freenode, #hurd, 2013-08-01 + + teythoon: can you stop tmux on darnassus please ? + i'd like to check something + done + tmux makes load average grow to 5 without any visible activity :/ + can't reproduce it with my instances though + anyway, that's minor + I used tmux before and never encountered that + sometimes tmux would hang on attaching or detaching though, but + overall I had less problems with tmux than with screen + ah, I tried to start tmux on darnassus and now it hangs diff --git a/open_issues/virtualization/fakeroot.mdwn b/open_issues/virtualization/fakeroot.mdwn index f4739776..f9dd4756 100644 --- a/open_issues/virtualization/fakeroot.mdwn +++ b/open_issues/virtualization/fakeroot.mdwn @@ -22,3 +22,46 @@ License|/fdl]]."]]"""]] btw, I believe our fakeroot-hurd is close to working actually it's just a argv[0] issue supposed to be fixed by exec_file_name but apparently not fixed in that case, for some reason + + +## IRC, freenode, #hurd, 2013-08-26 + + < teythoon> also I looked into the fakeroot issue, aiui the problem is that + scripts are not handled correctly, right? + < teythoon> the exec server fails to locate the scripts file name, and so + it hands the file_t to the interpreter process and passes /dev/fds/3 as + script name + < teythoon> afaics that breaks e.g. python + < youpi> yes + < youpi> pinotree's exec_file_name is supposed to fix that, but for some + reason it doesn't work here + < pinotree> it was pochu's, not mine + < youpi> ah, right + < teythoon> ah I see, I was wondering about that + < pochu> it was working for a long time, wasn't it? + < pochu> and only stopped working recently + < youpi> did it completely stop? + < youpi> I have indeed seen odd issues + < youpi> I haven't actually checked whether it has completely stopped + working + < youpi> probably worth looking there first + < pinotree> gtk+3.0 fails, but other stuff like glib2.0 and gtester-using + stuff works + < teythoon> huh? I created tests like "#!/bin/sh\necho $0" and that says + /dev/fd..., and a python script doing the same doesn't even run, so how + can it work for a package build? + < youpi> it works for me in plain bash + < youpi> #!/bin/sh + < youpi> echo $0 + < youpi> € $PWD/test.sh + < youpi> /home/samy/test.sh + < teythoon> it does !? + < youpi> yes + < youpi> not in fakeroot-hurd however, as we said + < teythoon> well, obviously it works when not being run under + fakeroot-hurd, yes + < youpi> ok, so we weren't talking about the same thing + < youpi> a mere shell script doesn't work in fakeroot-hurd indeed + < youpi> that's why we still use fakeroot-sysv + < teythoon> right + < youpi> err, -tcp diff --git a/open_issues/virtualization/networking.mdwn b/open_issues/virtualization/networking.mdwn index 7a6474a1..f8bda063 100644 --- a/open_issues/virtualization/networking.mdwn +++ b/open_issues/virtualization/networking.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2010 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2010, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -28,3 +28,73 @@ Collection about stuff that is relevant for *virtualization* and *networking*. [[hurd/translator/pfinet]] by setting environment variables. Project is now part of [[Virtual_Square_View-OS]]. + + +# OpenVPN + +[[community/meetings/GHM2013]]. + + +## IRC, freenode, #hurd, 2013-08-23 + + good news + with a couple small patches, openvpn does work as joe user + + +## IRC, freenode, #hurd, 2013-08-30 + + it's really cool that openvpn ended up working completely the day + before :) + + +## IRC, freenode, #hurd, 2013-09-03 + + <_d3f> Hey guys, how did you get openvpn working on the Hurd? just curious + as I saw it in the GHM video + <_d3f> no one here who has a clue how to get *vpn working on the Hurd? + _d3f: youpi did it + i don't know the details + <_d3f> okay, I will question him when I see him around, thx. Do you know if + it was a lot of work to get the tun device working? Because I would like + to use tinc on the Hurd. + _d3f: a bit but not that much either + <_d3f> braunr: well, okay. Do you know if the source of his 'port' is + online, I haven't found it :/ + it should be soon + + +## IRC, freenode, #hurd, 2013-09-04 + + <_d3f> youpi: you are the guy who has brought openvpn to the hurd, right? I + would like to know how you got the tun/tap thing working as I would like + to use tinc on it. :) + _d3f: essentially no modification of openvpn iirc + just tell it to open the tun node created by pfinet + and read/write it + i.e. the existing generic code in place in openvpn + <_d3f> I will have a look at it, somekind tinc builds with the linux + specific device.c but I wasn't able to exchange keys. I will have a look + at the device handling again and try to get the pfinet tun node used. + + +## IRC, freenode, #hurd, 2013-09-07 + + anyone here knows how /dev/net is handled on the hurd? Programs using + it say it's not a directory. I tried creating one and setting a netdde + translator for a tun device in it, but this may be wrong as it doesn't + work + d3f: what does /dev/net do? + ah, its tun/tap stuff... + on my gnu/linux it includes a tun device + right + I am still reading about the Hurd and try to understand /hurd/netdde + and devnode but by now I am quite sure I will need those to set a tun + networktranslator on /dev/net/tun? + hm, I don't think netdde or devnode will be of any help + afaiui devnode makes mach devices available in the hurdish way, + i.e. available for lookup in the filesystem + d3f: ping youpi if he shows up, he hacked up openvpn to work on + the hurd + yeah I know, I talked to him as I am tring to get tinc working on the + Hurd (tinc builds by now). I will give him a shot about creating the + "tun" device diff --git a/public_hurd_boxen.mdwn b/public_hurd_boxen.mdwn index 36e04ab1..4c7b3e24 100644 --- a/public_hurd_boxen.mdwn +++ b/public_hurd_boxen.mdwn @@ -29,7 +29,7 @@ image|hurd/running/qemu]]. "[[bddebian]]","goober","Debian GNU/Hurd","?" "[[bddebian]]","grubber","Debian GNU/Hurd","Celeron 2.2 GHz; 554 MiB","Xen domU on [[zenhost]]; for experimental stuff" "[[bddebian]]","[[zenhost]]","Debian GNU/Linux","Celeron 2.2 GHz","Xen dom0 for several hosts" -"[[sceen]]","darnassus","Debian GNU/Hurd","Core i5 3.1 GHz, 1.8 GiB","KVM guest on shattrath; public Hurd box; web server" +"[[sceen]]","darnassus","Debian GNU/Hurd","Core i5 3.1 GHz, 1.8 GiB","KVM guest on shattrath; public Hurd box; [web server](http://darnassus.sceen.net/)" "[[sceen]]","ironforge","Debian GNU/Hurd","Core i5 3.1 GHz, 1.8 GiB","KVM guest on shattrath; Debian buildd" "[[sceen]]","exodar","Debian GNU/Hurd","Core i5 3.1 GHz, 1.8 GiB","KVM guest on shattrath; Debian porterbox, all Debian Developers have access" "[[sceen]]","shattrath","Debian GNU/Linux","Core i5 3.1 GHz","KVM host" diff --git a/public_hurd_boxen/sceen.mdwn b/public_hurd_boxen/sceen.mdwn index 25416857..b9188ffe 100644 --- a/public_hurd_boxen/sceen.mdwn +++ b/public_hurd_boxen/sceen.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2011, 2013 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -9,3 +9,11 @@ is included in the section entitled [[GNU Free Documentation License|/fdl]]."]]"""]] + + +# IRC, freenode, #hurd, 2013-08-21 + + i made all sceen.net VMs use hugetlbfs for their physical memory + i suspect a system like the hurd, with such a huge working set for + just about every action compared to other systems, should visibly benefit + from that -- cgit v1.2.3