summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--capability.mdwn20
-rw-r--r--community/gsoc/2013/hacklu.mdwn2099
-rw-r--r--community/gsoc/2013/nlightnfotis.mdwn3037
-rw-r--r--community/gsoc/project_ideas/download_backends.mdwn11
-rw-r--r--community/gsoc/project_ideas/mtab/discussion.mdwn2072
-rw-r--r--community/gsoc/project_ideas/object_lookups.mdwn29
-rw-r--r--community/gsoc/project_ideas/sound/discussion.mdwn47
-rw-r--r--contributing.mdwn3
-rw-r--r--contributing/discussion.mdwn68
-rw-r--r--contributing/web_pages/news/qoth_next.mdwn11
-rw-r--r--faq/sata_disk_drives/discussion.mdwn234
-rw-r--r--faq/still_useful.mdwn2
-rw-r--r--faq/system_port.mdwn24
-rw-r--r--glibc/signal/signal_thread.mdwn45
-rw-r--r--hurd.mdwn1
-rw-r--r--hurd/coding_style.mdwn59
-rw-r--r--hurd/console/discussion.mdwn10
-rw-r--r--hurd/debugging.mdwn27
-rw-r--r--hurd/debugging/rpctrace.mdwn43
-rw-r--r--hurd/libfuse.mdwn20
-rw-r--r--hurd/libstore.mdwn37
-rw-r--r--hurd/libstore/part.mdwn133
-rw-r--r--hurd/running/debian/dhcp.mdwn97
-rw-r--r--hurd/subhurd.mdwn375
-rw-r--r--hurd/subhurd/discussion.mdwn10
-rw-r--r--hurd/translator.mdwn7
-rw-r--r--hurd/translator/auth.mdwn13
-rw-r--r--hurd/translator/eth-filter.mdwn37
-rw-r--r--hurd/translator/examples.mdwn8
-rw-r--r--hurd/translator/exec.mdwn8
-rw-r--r--hurd/translator/ext2fs.mdwn63
-rw-r--r--hurd/translator/fifo.mdwn48
-rw-r--r--hurd/translator/firmlink.mdwn14
-rw-r--r--hurd/translator/hostmux.mdwn15
-rw-r--r--hurd/translator/httpfs.mdwn100
-rw-r--r--hurd/translator/netio.mdwn7
-rw-r--r--hurd/translator/nsmux.mdwn27
-rw-r--r--hurd/translator/pfinet.mdwn9
-rw-r--r--hurd/translator/pfinet/implementation.mdwn167
-rw-r--r--hurd/translator/pflocal.mdwn28
-rw-r--r--hurd/translator/proc.mdwn75
-rw-r--r--hurd/translator/procfs/jkoenig/discussion.mdwn177
-rw-r--r--hurd/translator/socketio.mdwn27
-rw-r--r--hurd/translator/tmpfs/tmpfs_vs_defpager.mdwn129
-rw-r--r--hurd/translator/ufs.mdwn38
-rw-r--r--libpthread.mdwn6
-rw-r--r--microkernel/discussion.mdwn40
-rw-r--r--microkernel/l4.mdwn12
-rw-r--r--microkernel/mach/concepts.mdwn17
-rw-r--r--microkernel/mach/deficiencies.mdwn1621
-rw-r--r--microkernel/mach/documentation.mdwn13
-rw-r--r--microkernel/mach/gnumach/debugging.mdwn5
-rw-r--r--microkernel/mach/gnumach/hardware_compatibility_list.mdwn5
-rw-r--r--microkernel/mach/gnumach/interface/syscall/mach_print.mdwn29
-rw-r--r--microkernel/mach/gnumach/memory_management.mdwn15
-rw-r--r--microkernel/mach/gnumach/ports.mdwn7
-rw-r--r--microkernel/mach/history.mdwn134
-rw-r--r--microkernel/mach/message/msgh_id.mdwn24
-rw-r--r--microkernel/mach/mig.mdwn7
-rw-r--r--microkernel/mach/mig/documentation.mdwn21
-rw-r--r--news/2008-09-11.mdwn6
-rw-r--r--open_issues/64-bit_port.mdwn20
-rw-r--r--open_issues/anatomy_of_a_hurd_system.mdwn425
-rw-r--r--open_issues/arm_port.mdwn52
-rw-r--r--open_issues/binutils.mdwn36
-rw-r--r--open_issues/boehm_gc.mdwn92
-rw-r--r--open_issues/clock_gettime.mdwn10
-rw-r--r--open_issues/cloud.mdwn49
-rw-r--r--open_issues/code_analysis.mdwn14
-rw-r--r--open_issues/crash_server.mdwn61
-rw-r--r--open_issues/dbus.mdwn175
-rw-r--r--open_issues/dde.mdwn188
-rw-r--r--open_issues/device_drivers_and_io_systems.mdwn6
-rw-r--r--open_issues/exec.mdwn49
-rw-r--r--open_issues/exec_leak.mdwn57
-rw-r--r--open_issues/exec_memory_leaks.mdwn54
-rw-r--r--open_issues/fakeroot_eagain.mdwn4
-rw-r--r--open_issues/gcc.mdwn698
-rw-r--r--open_issues/gccgo.mdwn9
-rw-r--r--open_issues/gdb.mdwn263
-rw-r--r--open_issues/gdb_gcore.mdwn6
-rw-r--r--open_issues/gdb_signal_handler.mdwn403
-rw-r--r--open_issues/glibc.mdwn644
-rw-r--r--open_issues/glibc/0.4.mdwn4
-rw-r--r--open_issues/glibc/debian.mdwn106
-rw-r--r--open_issues/glibc/debian/experimental.mdwn60
-rw-r--r--open_issues/glibc/t/tls-threadvar.mdwn52
-rw-r--r--open_issues/glibc/t/tls.mdwn6
-rw-r--r--open_issues/gnumach_integer_overflow.mdwn35
-rw-r--r--open_issues/gnumach_vm_object_resident_page_count.mdwn28
-rw-r--r--open_issues/hurd_init.mdwn216
-rw-r--r--open_issues/libc_variant_selection.mdwn25
-rw-r--r--open_issues/libmachuser_libhurduser_rpc_stubs.mdwn26
-rw-r--r--open_issues/libnetfs_passive_translators.mdwn55
-rw-r--r--open_issues/libnetfs_vs_libdiskfs.mdwn118
-rw-r--r--open_issues/libpthread.mdwn199
-rw-r--r--open_issues/libpthread/t/fix_have_kernel_resources.mdwn398
-rw-r--r--open_issues/libpthread_assertion_thread_prevp.mdwn20
-rw-r--r--open_issues/libpthread_dlopen.mdwn16
-rw-r--r--open_issues/llvm.mdwn33
-rw-r--r--open_issues/mach_migrating_threads.mdwn88
-rw-r--r--open_issues/magic_translator_machtype.mdwn3
-rw-r--r--open_issues/memory_object_model_vs_block-level_cache.mdwn243
-rw-r--r--open_issues/mig_portable_rpc_declarations.mdwn113
-rw-r--r--open_issues/mig_stub_functions.mdwn41
-rw-r--r--open_issues/mondriaan_memory_protection.mdwn85
-rw-r--r--open_issues/nightly_builds.mdwn7
-rw-r--r--open_issues/nptl.mdwn22
-rw-r--r--open_issues/open_symlink.mdwn14
-rw-r--r--open_issues/profiling.mdwn105
-rw-r--r--open_issues/pthread_atfork.mdwn13
-rw-r--r--open_issues/resource_management_problems.mdwn51
-rw-r--r--open_issues/robustness.mdwn44
-rw-r--r--open_issues/secure_file_descriptor_handling.mdwn8
-rw-r--r--open_issues/sendmsg_scm_creds.mdwn77
-rw-r--r--open_issues/some_todo_list.mdwn15
-rw-r--r--open_issues/systemd.mdwn933
-rw-r--r--open_issues/thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock.mdwn33
-rw-r--r--open_issues/time.mdwn762
-rw-r--r--open_issues/tmux.mdwn24
-rw-r--r--open_issues/translate_fd_or_port_to_file_name.mdwn57
-rw-r--r--open_issues/translator_stdout_stderr.mdwn87
-rw-r--r--open_issues/user-space_device_drivers.mdwn97
-rw-r--r--open_issues/virtualization/fakeroot.mdwn43
-rw-r--r--open_issues/virtualization/networking.mdwn72
-rw-r--r--public_hurd_boxen.mdwn2
-rw-r--r--public_hurd_boxen/sceen.mdwn10
-rw-r--r--system_call.mdwn15
m---------toolchain/logs12
129 files changed, 18887 insertions, 374 deletions
diff --git a/capability.mdwn b/capability.mdwn
index 7219cdce..0ebe5cd4 100644
--- a/capability.mdwn
+++ b/capability.mdwn
@@ -1,5 +1,5 @@
-[[!meta copyright="Copyright © 2007, 2008, 2010, 2011 Free Software Foundation,
-Inc."]]
+[[!meta copyright="Copyright © 2007, 2008, 2010, 2011, 2013 Free Software
+Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -77,6 +77,22 @@ port|microkernel/mach/port]]. As in UNIX (see above), they are not
[[persistent|persistency]].
+## IRC, freenode, #hurd, 2013-07-01
+
+ <nlightnfotis> I have read plenty of documents, and wrapped my head around
+ most Hurd concepts, but I still have not understood well what
+ capabilities are.
+ <youpi> Mmm, which capabilities?
+ <youpi> AIUI, the Hurd doesn't really have a notion of capabilites, just a
+ notion of owning a port right
+ <nlightnfotis> From what I have understood (from the critique) they
+ reference ports so they objects can be referenced via them
+ <youpi> (which provides processes a way for doing things)
+ <youpi> ok, so we are talking about the same thing, I guess
+ <nlightnfotis> ahh, that's cool. I thought there was more to the story that
+ I couldn't understand
+
+
# Further Reading
* [[Mach port|microkernel/mach/port]]
diff --git a/community/gsoc/2013/hacklu.mdwn b/community/gsoc/2013/hacklu.mdwn
new file mode 100644
index 00000000..b7de141b
--- /dev/null
+++ b/community/gsoc/2013/hacklu.mdwn
@@ -0,0 +1,2099 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+[[!toc]]
+
+
+# IRC, freenode, #hurd, 2013-06-23
+
+ <hacklu> braunr: sorry for the late reply. Honestly to say, the school
+ works had taken most of my time these days. I haven't got any
+ siginificant progress now. I am trying to write a little debugger demo on
+ Hurd.
+ <hacklu> braunr: things goes more hard than I think, these are some
+ differences between ptrace() on Hurd and Linux. I am trying to solve
+ this.
+
+
+# IRC, freenode, #hurd, 2013-06-24
+
+ <hacklu> this is my weekly report
+ http://hacklu.com/blog/gsoc-weekly-report1-117/.
+ <hacklu> and I have two main questions when I read the gdb source code.
+ <hacklu> 1/What is the S_exception_raise_request()? 2/what is the role of
+ ptrace in gdb port on Hurd?
+ <youpi> hacklu: where did you see S_exception_raise_request?
+ <hacklu> in gdb/gnu-nat.c
+ <youpi> ah, in gdb
+ <hacklu> yeah. and I have read the <The hurd hacking guide>. is says the S_
+ start means server stub.
+ <youpi> yes
+ <youpi> what happens is that gnu_wait keeps calling mach_msg
+ <youpi> to get a message
+ <youpi> then it passes that message to the various stubs servers
+ <youpi> see just below, it calls exc_server, among others
+ <youpi> and that's exc_server which ends up calling
+ S_exception_raise_request, if the message is an exception_raise request
+ <youpi> exc_server is a mere multiplexer, actually
+ <tschwinge> S_exception_raise_request is the implementation of the request
+ part (so one half of a typical RPC) of the Mach exception interface.
+ <tschwinge> See gdb/exc_request.defs in GDB and include/mach/exc.defs in
+ Mach.
+ <hacklu> youpi: how gnu_wait pass one message to exc_server? in which
+ function?
+ <youpi> in gnu_wait()
+ <youpi> && !exc_server (&msg.hdr, &reply.hdr)
+ <hacklu> oh, I see this.
+ <hacklu> firstly I think it is a type check simply.
+ <youpi> see the comment: "handle what we got"
+ <tschwinge> The Hurd's proc server also is involved in the exception
+ passing protocol (see its source code).
+ <hacklu> tschwinge: I will check the source code later. is the exception
+ take place in this way: 1. the inferior call ptrace(TRACE_ME). 2.the gdb
+ call task_set_exception_port. 3. mach send a notification to the
+ exception port set before. 4. gdb take some action.
+ <tschwinge> hacklu: Yes, that's it, roughly. The idea is that GDB replaces
+ a process' standard exception port, and replaces it "with itself", so
+ that when the process that is debugged receives and exception (by Mach
+ sending a exception_raise RPC), GDB can then catch that and act
+ accordingly.
+ <tschwinge> hacklu: As for your other questions, about ptrace: As you can
+ see in [glibc]/sysdeps/mach/hurd/ptrace.c, ptrace on Hurd is simply a
+ wrapper around vm_read/write and more interfaces.
+ <tschwinge> hacklu: As the GDB port for Hurd is specific to Hurd by
+ definition, you can also directly use these calls in GDB for Hurd.
+ <tschwinge> ..., as it is currently done.
+ <hacklu> and in detail, the part 3 mach send a notification to the
+ excetption port is like this: gnu_wait get the message in mach_msg, and
+ then pass it to exc_serer by exc_server(),then exc_server call
+ S_exception_raise_request()? ?
+ <hacklu> tschwinge: yeah, I have see the ptrace.c. I was wonder about
+ nobody use ptrace in Hurd except TRACEME...
+ <tschwinge> hacklu: Right about »and in detail, [...]«.
+ <tschwinge> hacklu: It would be very good (and required for your
+ understanding anyway), if you could write up a list of things that
+ happens when a process (both under the control of GDB as well as without
+ GDB) is sent an exception (due to a breakpoint instruction, for example).
+ <tschwinge> Let me look something up.
+ <hacklu> tschwinge: what's the function of exc_server? if I can get the
+ notification in mach_msg().
+ <youpi> to multiplex the message
+ <youpi> i.e. decoding it, etc. up to calling the S_ function with the
+ proper parameters
+ <youpi> exc_server being automatically generated, that saves a lot of code
+ <tschwinge> That is generated by MIG from the gdb/exc_request.defs file.
+ <tschwinge> You'll find the generated file in the GDB build directory.
+ <hacklu> I have wrote down the filenames. after this I will check that.
+ <tschwinge> hacklu: I suggest you also have a look at the Mach 3 Kernel
+ Principles book,
+ <http://www.gnu.org/software/hurd/microkernel/mach/documentation.html>.
+ <tschwinge> This also has some explanation of the thread/task's exception
+ mechanism.
+ <tschwinge> And of course, explains the RPC mechanism, which the exception
+ mechanism is built upon.
+ <tschwinge> And then, really make a step-by-step list of what happens; this
+ should help to better visualize what's going on.
+ <hacklu> ok. later I will update this list on my blog.
+ <tschwinge> hacklu: I cannot tell off-hand why GDB on Hurd is using
+ ptrace(PTRACE_TRACEME) instead of doing these calls manually. I will
+ have to look that up, too.
+ <hacklu> tschwinge: thanks.
+ <tschwinge> hacklu: Anyway -- you're asking sensible questions, so it seems
+ you're making progress/are on track. :-)
+ <hacklu> tschwinge: there is something harder than I had thought, I haven't
+ got any meaningful progress. sorry for this.
+ <tschwinge> hacklu: That is fine, and was expected. :-) (Also, you're
+ still busy with university.)
+ <hacklu> I will show more time and enthusiasm on this.
+ <tschwinge> hacklu: Oh, and one thing that may be confusing: as you may
+ have noticed, the names of the same RPC functions are sometimes slightly
+ different if different *.defs files. What is important is the subsystem
+ number, such as 2400 in [GDB]/gdb/exc_request.defs (and then incremented
+ per each routine/simpleroutine/skip directive).
+ <tschwinge> hacklu: Just for completeness, [hurd]/hurd/subsystems has a
+ list of RPC subsystems we're using.
+ <tschwinge> And the name given to routine 2400, for example, is just a
+ "friendly name" that is then used locally in the code where the *.defs
+ file has been processed by MIG.
+ <tschwinge> What a clumsy explanation of mine. But you'll get the idea, I
+ think. ;-)
+ <tschwinge> hacklu: And don't worry about your progress -- you're making a
+ lot of progress already (even if it doesn't look like it, because you're
+ not writing code), but the time spent on understanding these complex
+ issues (such as the RPC mechanism) definitely counts as progress, too.
+ <hacklu> tschwinge: not clearly to got it as I am not sensitive with the
+ MIG's grammer. But I know, the exc is the routine 2400's alias name?
+ <tschwinge> hacklu: I'd like to have you spend enough time to understand
+ these fundamental concepts now, and then switch to "hacking mode" (write
+ code) later, instead of now writing code but not understanding the
+ concepts behind it.
+ <hacklu> I have wrote a bit code to validate my understanding when I read
+ the soruce code. But the code not run. http://pastebin.com/r3wC5hUp
+ <tschwinge> The subsystem directive [...]. As well, let me just point you
+ to the documentation:
+ <http://www.gnu.org/software/hurd/microkernel/mach/mig/documentation.html>,
+ MIG - THE MACH INTERFACE GENERATOR, chapter 2.1 Subsystem identification.
+ <tschwinge> hacklu: Yes, writing such code for testing also is a good
+ approach. I will have to look at that in more detail, too.
+ * tschwinge guesses hacklu is probably laughing when seeing the years these
+ documents were written in (1989, etc.). ;-)
+ <hacklu> mach_msg make no sense in my code, and the process just hang. kill
+ -9 can't stop is either.
+ <braunr> hacklu: do you understand why kill -KILL might not work now ?
+ <hacklu> braunr: no, but I found I can use gdb to attach to that process,
+ then quit in gdb, the process quit too.
+ <hacklu> maybe that process was waiting a resume.
+ <braunr> something like that yes
+ <braunr> iirc it's related to a small design choice in the proc server
+ <braunr> something that put processes in an uninterruptible state when
+ being debugged
+ <hacklu> iirc ?
+ <braunr> if i recall cl=orrectly
+ <braunr> correctly*
+ <hacklu> like D status in linux?
+ <braunr> or T
+ <braunr> there has been a lot of improvements regarding signal handling in
+ linux over time so it's not really comparable now
+ <braunr> but that's the idea
+ <hacklu> in ps, i see the process STAT is THumx
+ <braunr> did you see that every process on the hurd has at least two
+ threads ?
+ <hacklu> no, but I have see that in hurd, the exception handler can't live
+ in the same context with the victim. so there must be at least two
+ threads. I think
+ <braunr> hacklu: yes
+ <braunr> that thread also handles regular signals
+ <braunr> in addition to mach exceptions
+ <braunr> (there are two levels of multiplexing in servers, first locating
+ the subsystem, then the server function)
+ <braunr> hacklu: if what i wrote is confusing, don't hesitate to ask for
+ clarifications (i really don't intend to make things confusing)
+ <hacklu> braunr: I don't know what you say about the "multiplexing in
+ servers". For instance, is it means how to pass message from mach_msg to
+ exc_server in gnu_wait()?
+ <braunr> hacklu: i said that the "message thread" handles both mach
+ exceptions and unix signals
+ <braunr> hacklu: these are two different interfaces (and in mach terms,
+ subsystems)
+ <braunr> hacklu: see hurd/msg.defs for the msg interface (which handles
+ unix signals)
+ <braunr> hacklu: to handle multiple interfaces in the same thread, servers
+ need to first find the right subsystem
+ <braunr> this is done by subsequently calling all demux functions until one
+ returns true
+ <braunr> (finding the right server function is done by these demux
+ functions)
+ <braunr> hacklu: see hurd/msgportdemux.c in glibc to see how it's done
+ there
+ <braunr> it's short actually, i'll past it here :
+ <braunr> return (_S_exc_server (inp, outp) ||
+ <braunr> _S_msg_server (inp, outp));
+ <braunr> hacklu: did that help ?
+ <hacklu> braunr: a bit more confusing. one "message thread" handles
+ exceptions and signals, means the message thread need to recive message
+ from two port. then pass the message to the right server which handle the
+ message. the server also should pick the right subsystem from a lot of
+ subsystems to handle the msg. is this ?
+ <braunr> the message thread is a server thread
+ <braunr> (which means every normal process is actually also a server,
+ receiving exceptions and signals)
+ <braunr> there may be only two ports, or more, it doesn't matter much, the
+ port set abstraction takes care of that
+ <hacklu> so the message thread directly pass the msg to the right
+ subsystem?
+ <braunr> not directly as you can see
+ <braunr> it tries them all until one is able to handle the incoming message
+ <braunr> i'm not sure it will help you with gdb, but it's important to
+ understand for a better general understanding of the system
+ <braunr> ugly sentence
+ <hacklu> ah, I see. like this in gnu-nat.c if(!notify_server(&msg.hdr,
+ &reply.hdr) && !exc_server(&msg.hdr...)
+ <braunr> yes
+ <hacklu> the thread just ask one by one.
+ <braunr> be careful about the wording
+ <braunr> the thread doesn't "send requests"
+ <braunr> it runs functions
+ <braunr> (one might be tempted to think there are other worker threads
+ waiting for a "main thread" to handle demultiplexing messages)
+ <hacklu> I got it.
+ <hacklu> the notify_server function is just run in the same context in
+ "message thread",and there is no RPC here.
+ <braunr> yes
+ <hacklu> and the notify_server code is generater by mig automatically.
+ <braunr> yes
+
+
+# IRC, freenode, #hurd, 2013-06-29
+
+[[!tag open_issue_documentation]]
+
+ <hacklu> I just failed to build the demo on
+ this. http://walfield.org/pub/people/neal/papers/hurd-misc/ipc-hello.c
+ <hacklu> or, example in machsys.doc called simp_ipc.c
+ <pinotree> we don't use cthreads anymore, but pthreads
+ <hacklu> pinotree: em.. and I also failed to find the <servers/env_mgr.h>
+ in example of <A programmer's guide to MACH system call>
+ <pinotree> that i don't know
+ <hacklu> maybe the code in that book out-of-date
+ <teythoon> hacklu: mig and mach ipc documentation is quite dated
+ unfortunately, and so are many examples floating around the net
+
+ <hacklu> btw, I have one more question. when I read <Mach 3 kernel
+ interface>. I find this state: When an exception occurs in a thread, the
+ thread sends an exception message to
+ <hacklu> its exception port, blocking in the kernel waiting for the receipt
+ of a reply. It is
+ <hacklu> assumed that some task is listening to this
+ <hacklu> port, using the exc_serverfunction to decode the messages and
+ then call the
+ <hacklu> linked in catch_exception_raise. It is the job of
+ catch_exception_raiseto handle the exception and decide the course of
+ action for thread.
+ <hacklu> that says, it assumed another task to recieve the msg send to one
+ thread's exception port. why another task?
+ <hacklu> I remmebered, there are at least two threads in one task, one is
+ handle the exception stuffs.
+ <braunr> there are various reasons
+ <braunr> first is, the thread causing the exception is usually not waiting
+ for a message
+ <braunr> next, it probably doesn't have all the info needed to resolve the
+ exception
+ <braunr> (depending on the system design)
+ <braunr> and yes, the second thread in every hurd process is the msg
+ thread, handling both mach exceptions and hurd signals
+ <hacklu> but in this state, I can't find any thing with the so called msg
+ thread
+ <braunr> ?
+ <hacklu> if exist a task to do the work, why we need this thread?
+ <braunr> this thread is the "task"
+ <hacklu> ?
+ <braunr> the msg thread is the thread handling exceptions for the other
+ threads in one task
+ <braunr> wording is important here
+ <braunr> a task is a collection of resources
+ <braunr> so i'm only talking about threads really
+ <braunr> 14:11 < hacklu> assumed that some task is listening to this
+ <braunr> this is wrong
+ <braunr> a task can't listen
+ <braunr> only a thread can
+ <hacklu> in you words, the two thread is in the same task?
+ <braunr> yes
+ <braunr> 14:32 < braunr> and yes, the second thread in every hurd process
+ is the msg thread, handling both mach exceptions and hurd signals
+ <braunr> process == task here
+ <hacklu> yeah, I always think the two thread stay in one task. but I found
+ that state in <mach 3 kernel interface>. so I confuzed
+ <hacklu> s/confuzed/confused
+ <braunr> statement you mean
+ <hacklu> if two thread stay in the same task. and the main thread throw a
+ exception, the other thread to handle it?
+ <braunr> depends on how it's configured
+ <braunr> the thread receiving the exceptions might not be in the same task
+ at all
+ <braunr> on the hurd, only the second thread of a task receives exception
+ <braunr> s
+ <hacklu> I just wonder how can the second thread catch the exception from
+ its containning task
+ <braunr> forget about tasks
+ <braunr> tasks are resource containers
+ <braunr> they don't generate or catch exceptions
+ <braunr> only threads do
+ <braunr> for each thread, there is an exception port
+ <braunr> that is, one receive right, and potentially many send rights
+ <braunr> the kernel uses a send right to send exceptions
+ <braunr> the msg thread waits for messages on the receive right
+ <braunr> that's all
+ <hacklu> ok. if I divide zero in main thread, the kernel will send a msg to
+ the main thread's exception port. and then, the second thread(in the same
+ task) is waiting on that port. so he get the msg. is it right?
+ <braunr> don't focus on main versus msg thread
+ <braunr> it applies to all other threads
+ <braunr> as well
+ <braunr> otherwise, you're right
+ <hacklu> ok, just s/main/first
+ <braunr> no
+ <braunr> main *and* all others except msg
+ <hacklu> main *and* all others except msg ?
+ <braunr> the msg thread gets exception messages for all other threads in
+ its task
+ <braunr> (at least, that's how the hurd configures things)
+ <hacklu> got it.
+ <hacklu> if the msg thread throw exception either, who server for himself?
+ <braunr> i'm not sure but i guess it's simply forbidden
+ <hacklu> i used gdb to attach a little progrom which just contains a divide
+ zero. and I only found the msg thread is in the glibc.
+ <braunr> yes
+ <hacklu> where is the msg thread located in.
+ <braunr> it's created by glibc
+ <hacklu> is it glibc/hurd/catch-exc.c?
+ <braunr> that's the exception handling code, yes
+ <hacklu> there are some differences between the code and the state in <mach
+ 3 system interface>.
+ <braunr> state or statement ?
+ <hacklu> staement
+ <braunr> which one ?
+ <hacklu> http://pastebin.com/ZTBrUAsV
+ When an exception occurs in a thread, the thread sends an exception
+ message to
+ its exception port, blocking in the kernel waiting for the receipt of a
+ reply. It is
+ assumed that some task is listening (most likely with mach_msg_server)
+ to this
+ port, using the exc_serverfunction to decode the messages and then
+ call the
+ linked in catch_exception_raise. It is the job of
+ catch_exception_raiseto handle the exception and decide the course of
+ action for thread. The state of the
+ blocked thread can be examined with thread_get_state.
+ <braunr> what difference ?
+ <hacklu> in the code, I can't find things like exc_server,mach_msg_server
+ <braunr> uh
+ <braunr> ok it's a little tangled
+ <braunr> but not that much
+ <braunr> you found the exception handling code, and now you're looking for
+ what calls it
+ <braunr> simple
+ <braunr> see _hurdsig_fault_init
+ <hacklu> from that statemnet I thought there are another _task_ do the
+ exception things for all of the systems thread before you have told me
+ the task means the msg thread.
+ <braunr> again
+ <braunr> 14:47 < braunr> forget about tasks
+ <braunr> 14:47 < braunr> tasks are resource containers
+ <braunr> 14:47 < braunr> they don't generate or catch exceptions
+ <braunr> 14:47 < braunr> only threads do
+ <hacklu> yeah, I think that document need update.
+ <braunr> no
+ <braunr> it's a common misnomer
+ <braunr> once you're used to mach concepts, the statement is obvious
+ <hacklu> braunr: so I need read more :)
+ <hacklu> _hurdsig_fault_init send exceptions for the signal thread to the
+ proc server?
+ <hacklu> why come about _proc_ server?
+ <braunr> no it gives the proc server a send right for signals
+ <braunr> exceptions are a mach thing, signals are a hurd thing
+ <braunr> the important part is
+ <braunr> err = __thread_set_special_port (_hurd_msgport_thread,
+ <braunr> THREAD_EXCEPTION_PORT, sigexc);
+ <hacklu> this one set the exception port?
+ <braunr> yes
+ <braunr> hm wait
+ <braunr> actually no, wrong part :)
+ <braunr> this sets the excpetion port for the msg thread (which i will call
+ the signal thread as mentioned in glibc)
+ <hacklu> but the comment above this line, Direct signal thread exceptions
+ to the proc server means what?
+ <braunr> that the proc server handles exceptions on the signal thread
+ <hacklu> the term signal thread equals the term msg thread?
+ <braunr> yes
+ <hacklu> so, the proc server handles the exceptions throwed by the msg
+ thread?
+ <braunr> looks that way
+ <hacklu> feels a little strange.
+ <braunr> why ?
+ <braunr> this thread isn't supposed to cause exceptions
+ <braunr> if it does, something is deeply wrong, and something must clean
+ that task up
+ <braunr> and the proc server seems to be the most appropriate place from
+ where to do it
+ <hacklu> why need a special server to just work the msg thread? I don't
+ think that thread will throw exception frequentlly
+ <braunr> what does frequency have to do with anything here ?
+ <braunr> ok the appropriate code is _hurdsig_init
+ <braunr> the port for receiving exceptions is _hurd_msgport
+ <braunr> the body of the signal thread is _hurd_msgport_receive
+ <hacklu> aha, in the _hurd_msgport_receive I have finally found the
+ while(1) loop mach_msg_server().
+ <hacklu> so the code is conform with the documents.
+ <hacklu> braunr: [21:18] <braunr> what does frequency have to do with
+ anything here ? yes, I have totally understood your words now. thank you
+ very much.
+ <braunr> :)
+
+
+# IRC, freenode, #hurd, 2013-07-01
+
+ <hacklu> hi. this is my weekly
+ report. http://hacklu.com/blog/gsoc-weekly-report2-124/ welcome to any
+ comment
+ <hacklu> teythoon: I only get clear about the rpc stuff. seems a lot behind
+ my plan
+ <youpi> good progress :)
+ <hacklu> I have wrote the details of the exception handle which was asked
+ by tschwing_ last week. Am I all right in my post?
+ <youpi> hacklu: as far as I understand signals, yes :)
+ <hacklu> youpi: thanks for god, I am on the right way finally... :)
+ <hacklu> the mig book says simpleroutine is the one use to implement asyn
+ RPCs which doesn't expect an reply. But I have found a place to pass an
+ reply port to the RPC interface which has been declared as simpleroutine
+ <youpi> hacklu: probably the simpleroutine hardcodes a reply port?
+
+ <youpi> hacklu: about _hurd_internal_post_signal, this is the hairiest part
+ of GNU/Hurd, signal handling
+ <youpi> simply because it's the hairiest part of POSIX :)
+ <youpi> you probably want to just understand that it implements the
+ POSIXity of signal delivering
+ <youpi> i.e. deliver/kill/suspend the process as appropriate
+ <youpi> I don't think you'll need to dive more
+ <hacklu> aha.
+ <hacklu> it will save a lot of time.
+ <hacklu> it seems like the wait_for_inferior() in gdb. which also has too
+ many lines and too many goto
+ <youpi> hacklu: btw, which simpleroutine were you talking about ?
+ <hacklu> I forget where it is, I am finding it now.
+ <youpi> which version of gdb are you looking the source of?
+ <youpi> (in mine, wait_for_inferior is only 45 lines long)
+ <hacklu> I dont know how to pick the verison, I just use the git
+ version. maybe I give a wrong name.
+ <youpi> ok
+ <hacklu> youpi:I remembered, my experience comes from here
+ http://www.aosabook.org/en/gdb.html. (All of this activity is managed by
+ wait_for_inferior. Originally this was a simple loop, waiting for the
+ target to stop and then deciding what to do about it, but as ports to
+ various systems needed special handling, it grew to a thousand lines,
+ with goto statements criss-crossing it for poorly understood
+ <hacklu> reasons.)
+ <hacklu> youpi: the simpleroutine is gdb/gdb/exc_request.defs
+ <youpi> so there is indeed an explicit reply port
+ <hacklu> but simpleroutine is for no-reply use. why use reply port here?
+ <youpi> AIUI, it's simply a way to make the request asynchronous, but still
+ permit an answer
+ <hacklu> ok, I will read the mig book carefully.
+ <braunr> hacklu: as youpi says
+ <braunr> a routine can be broken into two simpleroutines
+ <braunr> that's why some interfaces have interface.defs,
+ interface_request.defs and interface_reply.defs files
+ <braunr> nlightnfotis: in mach terminology, a right *is* a capability
+ <braunr> the only thing mach doesn't easily provide is a way to revoke them
+ individually
+ <nlightnfotis> braunr: Right. And ports are associated with the process
+ server and the kernel right? I mean, from what I have understood, if a
+ process wants to send a signal to another one, it has to do so via the
+ ports to that process held by the process server
+ <nlightnfotis> and it has to establish its identity before doing so, so
+ that it can be checked if it has the right to send to that port.
+ <braunr> yes
+ <nlightnfotis> do process own any ports? or are all their ports associated
+ with the process server?
+ <nlightnfotis> *processes
+ <braunr> mach ports were intended for a lot of different uses
+ <braunr> but in the hurd, they mostly act as object references
+ <braunr> the process owning the receive right (one at most per port)
+ implements the object
+ <braunr> processes owning send rights invoke methods on the object
+ <braunr> use portinfo to find out about the rights in a task
+ <braunr> (process is the unix terminology, task is the mach terminologyà
+ <braunr> )
+ <braunr> i use them almost interchangeably
+ <nlightnfotis> ahh yes, I remember about the last bit. And mach tasks have
+ a 1 to 1 association with user level processes (the ones associated with
+ the process server)
+ <braunr> the proc server is a bit special because it has to know about all
+ processes
+ <braunr> yes
+
+In context of [[open_issues/libpthread/t/fix_have_kernel_resources]]:
+
+ <braunr> hacklu: if you ever find out about either glibc or the proc server
+ creating one receive right for each thread, please let me know
+
+
+# IRC, freenode, #hurd, 2013-07-07
+
+ <hacklu> how fork() goes?
+ <pinotree> see sysdeps/mach/hurd/fork.c in glibc' sources
+ <hacklu> when the father has two thread( main thread and the signal thead),
+ if the father call fork, then the child inmediatelly call exev() to
+ change the excute file. how many thread in the children?
+ <hacklu> For instance, the new execute file also have two thread.
+ <hacklu> will the exev() destroyed two threads and then create two new?
+ <hacklu> s/exev()/excv()
+ <hacklu> s/exev()/exec() :)
+
+ <hacklu> what libhurduser-2.13.so does?
+ <hacklu> where can I find this source?
+ <pinotree> contains all the client stubs for hurd-specific RPCs
+ <pinotree> it is generated and built automatically within the glibc build
+ process
+
+ <hacklu> and what is the "proc" server?
+ <pinotree> what handles in user spaces the processes
+ <hacklu> so if I call proc_wait_request(), I will go into the
+ S_proc_wait_reply?
+ <hacklu> thanks, I have found that.
+
+
+# IRC, freenode, #hurd, 2013-07-08
+
+ <hacklu> hi, this is my weekly
+ report. http://hacklu.com/blog/gsoc-weekly-report3-137/
+ <hacklu> this week I have met a lot of obstacles. And I am quite desired to
+ participate in this meeting.
+ <tschwinge> hacklu: So from your report, the short version is: you've been
+ able to figure out how the things work that you were looking at (good!),
+ and now there are some new open questions that you're working on now.
+ <tschwinge> hacklu: That sounds good. We can of course try to help with
+ your open questions, if you're stuck figuring them out on your own.
+ <hacklu> tschwinge: the most question is: what is the proc server? why need
+ to call proc_get_reqeust() before the mach_msg()?
+ <hacklu> and Is there exist any specific running sequence between father
+ and child task after fork()? And I found the inferior always call the
+ trace_me() in the same time(the trace me printf always in the same line
+ of the output log). which I have post in my report.
+ <tschwinge> hacklu: The fork man-page can provide a high-level answer to
+ your Q3: »The child process is created with a single thread—the one that
+ called fork(). The entire virtual address space of the parent is
+ replicated in the child, including the states of mutexes, condition
+ variables, and other pthreads objects [...]«
+ <tschwinge> hacklu: What happens in GNU Hurd is that the signal thread is
+ also "cloned" (additionally to the thread which called fork), but then it
+ (the signal thread) is re-started from the beginning. (So this is very
+ much equivalent to creating a new signal thread.)
+ <tschwinge> hacklu: Then, upon exec, a new memory image is created/loaded,
+ replacing the previous one. [glibc]/sysdeps/mach/hurd/execve.c. What
+ actually happens with the existing thread (in particular, the signal
+ thread) I don't know off-hand. Then answer is probably found in
+ [glibc]/hurd/hurdexec.c -- and perhaps some code of the exec server
+ ([hurd]/exec/).
+ <hacklu> I have checked the status of my regiter mail to FSF. it says it
+ had arrived in USA.
+ <tschwinge> hacklu: OK, good.
+ <tschwinge> hacklu: This is some basic information about the observer_*
+ functions is GDB:
+ http://sourceware.org/gdb/current/onlinedocs/gdbint/Algorithms.html#index-notifications-about-changes-in-internals-57
+ »3.10 Observing changes in gdb internals«.
+ <hacklu> tschwinge: not too clear. I will think this latter. and what is
+ the proc server?
+ <teythoon> hacklu: /hurd/proc, maps unix processes to mach threads afaiui
+ <hacklu> teythoon: question is, the mach_msg() will never return unless I
+ called proc_wait_request() first.
+ <teythoon> hacklu: sorry, I've no idea ;)
+ <hacklu> teythoon: :)
+ <tschwinge> hacklu: I will have to look into that myself, too; don't know
+ the answer off-hand.
+ <tschwinge> hacklu: In your blog you write proc_get_request -- but such a
+ functions doesn't seems to exist?
+ <hacklu> tschwinge: s/proc_get_request/proc_wait_request called in
+ gun_wait() [gnu-nat.c]
+ <tschwinge> hacklu: Perhaps the wait man-page's description of WUNTRACED
+ gives a clue: »also return if a child has stopped [...]«. But it also to
+ me is not yet clear, how this relates to the mach_mag call, and how the
+ proc server exactly is involved in it.
+ <tschwinge> I'm reading various source code files.
+ <tschwinge> At least, I don't undestand why it is required for an exception
+ to be forwarded.
+ <hacklu> if I need to read the proc server source code?
+ <tschwinge> I can see how it to become relevant for the case that GDB has
+ to be informed that the debugee has exited normally.
+ <tschwinge> hacklu: Yeah, probably you should spend some time with that, as
+ it will likely help to get a clearer picture of the situation, and is
+ relevant for other interactions in GDB, too.
+ <tschwinge> hacklu: By the way, if you find that pieces of the GDB source
+ code (especially the Hurd files of it) are insufficiently documented,
+ it's a very good idea, once you have figured out something, to add more
+ source code comments to the existing code. Or writed these down
+ separately, if that is easier.
+ <hacklu> which is the proc server? hurd/exec ?
+ <hacklu> that ok, I already comment things on my notes.
+ <tschwinge> hacklu: [Hurd]/proc/
+ <tschwinge> hacklu: And [Hurd]/hurd/process*.defs
+ <hacklu> got it
+ <tschwinge> hacklu: I'll have to experiment a bit with your HDebugger
+ example, but I'm out of time right now, sorry. Will continue later.
+ <hacklu> tschwinge: yep, the HDebugger has a problem, if you put the
+ sleep() after the printf in the just_print(), thing will hang.
+ <hacklu> tschwinge: and I am a little curious about how do you find my
+ code? I dont't remember I have mentioned that :)
+ <hacklu> tschwinge: I have post my gihub link in the last week report, I
+ found that.
+ <tschwinge> hacklu: That's how I found it, yes.
+ <hacklu> tschwinge: :)
+
+
+# IRC, freenode, #hurd, 2013-07-14
+
+ <hacklu> hi. what is a process's msgport?
+ <hacklu> And where can I find the msg_sig_post_untraced_request()?
+ <hacklu> (msg_sig_post* in [hurd]/hurd/msg_defs)
+ <hacklu> this is my debugger demo code
+ https://github.com/hacklu/HDebugger.git use make test to run the demo. I
+ put a breakpoint before the second printf in hello_world(inferior
+ program). but I can't resume execution from that.
+ <hacklu> could somebody give me some suggestions? thanks so much.
+ <teythoon> hacklu: % make test
+ <teythoon> make: *** No rule to make target `exc_request_S.c', needed by
+ `all'. Stop.
+ <hacklu_> teythoon: updated, forget to git add that file .
+ <teythoon> hacklu_: cool, seems to work now
+ <teythoon> will look into this tomorrow :)
+ <hacklu_> exit
+ <hacklu_> teythoon: not work. the code can,t resume from a breakpoint
+
+
+# IRC, freenode, #hurd, 2013-07-15
+
+ <hacklu> hi, this is my weekly
+ report. http://hacklu.com/blog/gsoc-weekly-report4-148/
+ <hacklu> sadly to unsolve the question of resume from breakpoint.
+ <teythoon> hacklu: have you tried to figure out what gdb does to resume a
+ process?
+ <hacklu> teythoon: hi. em, I have tried, but haven't find the magic in gdb
+ yet.
+ <teythoon> have you tried rpctrace'ing gdb?
+ <hacklu> no, rpctrace has too many noise. I turned on the debug in gdb.
+ <hacklu> I don't want rpctrace start gdb as its child task. if it can
+ attach at some point instead of at start
+ <teythoon> hacklu: you don't need to use gdb interactively, you could pipe
+ some commands to it
+ <hacklu> teythoon: that sounds a possible way. I am try it, thank you
+ <hacklu> youpi: gdb can't work correctlly with rpctrace even in batch
+ mode.
+ <hacklu> get something like this "rpctrace: get an unknown send right from
+ process 2151"
+ <youpi> hacklu: well, ideally, fix rpctrace );
+ <youpi> ;)
+ <youpi> hacklu: but you can also as on the list, perhaps somebody knows
+ what you need
+ <hacklu> ok.
+ <hacklu> or I should debug gdb more deeply.
+ <youpi> do both
+ <youpi> so either of them may win first
+
+ <hacklu> braunr: I have found that, if there is no exception appears, the
+ signal thread will not be createed. Then there is only one thread in the
+ task.
+
+
+# IRC, freenode, #hurd, 2013-07-17
+
+ <hacklu__> braunr: ping
+ <braunr> hacklu__: yes ?
+ <hacklu__> I have reply your email
+ <braunr> i don't understand
+ <braunr> "I used this (&_info)->suspend_count to get the sc value."
+ <braunr> before the thread_info call ?
+ <hacklu__> no, after the call
+ <braunr> but you have a null pointer
+ <braunr> the info should be returned in info, not _info
+ <hacklu__> strange thing is the info is a null pointer. but _info not
+ <braunr> _info isn't a pointer, that's why
+ <braunr> the kernel will use it if the data fits, which is usually the case
+ <hacklu__> in the begin , the info=&_info.
+ <braunr> and it will dynamically allocate memory if it doesn't
+ <braunr> yes
+ <braunr> info should still have that value after the call
+ <hacklu__> but the call had change it. this is what I can;t understand.
+ <braunr> are you completely sure err is 0 on return ?
+ <hacklu__> since the parameter is a pointer to pointer, the thread_info can
+ change it , but I don't think it is a good ideal to set it to null
+ pointer without any err .
+ <hacklu__> yes. i am sure
+ <braunr> info_len is wrong
+ <braunr> it should be the number of integers in _info
+ <braunr> i.e. sizeof(_info) / sizeof(unsigned int)
+ <braunr> i don't think that's the problem though
+ <braunr> yes, THREAD_BASIC_INFO_COUNT is already exactly that
+ <braunr> hm not exactly
+ <braunr> yes, exactly in fact
+ <hacklu__> I try to set it by hand, not use the macro.
+ <braunr> the macro is already defined as #define THREAD_BASIC_INFO_COUNT
+ (sizeof(thread_basic_info_data_t) / sizeof(natural_t))
+ <hacklu__> the info_len is 13. I checked.
+ <braunr> so, i said something wrong
+ <braunr> the call doesn't reallocate thread_info
+ <braunr> it uses the provided storage, nothing else
+ <braunr> yes, your call is wrong
+ <braunr> use thread_info (thread->port, THREAD_BASIC_INFO, (int *) info,
+ &info_len);
+ <hacklu__> em. thread_info (thread->port, THREAD_BASIC_INFO, (int *) &info,
+ &info_len);
+ <braunr> &info would make the kernel erase the memory where info (the
+ pointer) was stored
+ <braunr> info, not &info
+ <braunr> or &_info directly
+ <braunr> i don't see the need for an intermediate pointer here
+ <braunr> ideally, avoid the cast
+ <hacklu__> but in gnu-nat.c line 3338, it use &info.
+ <braunr> use a union with both thread_info_data_t and
+ thread_basic_info_data_t
+ <braunr> well, try it my way
+ <braunr> i think they're wrong
+ <hacklu__> ok, you are right, use info it is ok. the value is the same as
+ &_info after the call.
+ <hacklu__> but the suspend_count is zero again.
+ <braunr> check the rest of the result to see if it's consistent
+ <hacklu__> I think this line need a patch.
+ <hacklu__> what you mean the rest of the result?
+ <braunr> the thread info
+ <braunr> run_state, sleep_time, creation_time
+ <braunr> see if they make sense
+ <hacklu__> ok, I try to dump it
+ <braunr> bbl
+ <hacklu__> braunr: thread [118] suspend_count=0
+ <hacklu__> run_state=3, flags=1, sleep_time=0,
+ creation_time.second=1374079641
+ <hacklu__> something like this, seems no problems.
+
+
+# IRC, freenode, #hurd, 2013-07-18
+
+ <hacklu__> how to get the thread state from TH_STATE_WAITING to
+ TH_STATE_RUNNING
+ <braunr> hacklu__:
+ http://www.gnu.org/software/hurd/gnumach-doc/Thread-Execution.html#Thread-Execution
+ <braunr> hacklu__: ah waiting
+ <braunr> hacklu__: this means the thread is waiting for an event
+ <braunr> so probably waiting for a message
+ <braunr> or an internal kernel event
+ <hacklu__> braunr: so I need to send it a message. I think I maybe forget
+ to send some reply message.
+ <braunr> hacklu__: i'm really not sure about those low level details
+ <braunr> confirm before doing anything
+ <hacklu__> the gdb has called msg_sig_post_untraced_request(), I don't get
+ clear about this function, I just call it as the same, maybe I am wrong .
+ <hacklu__> how will if I send a CONT to the stopped process? maybe I should
+ try this.
+ <hacklu__> when the inferior is in waiting
+ status(TH_STATE_WAITING,suspend_count=0), I use kill to send a CONT. then
+ the become(TH_STATE_STOP,suspend_count=1). when I think I am near the
+ success,I call thread_resume(),inferior turn out to be (TH_STATE_WAITING,
+ suspend_count=0).
+ <braunr> so yes, probably waiting for a message
+ <hacklu__> braunr: after send a CONT to the inferior, then send a -9 to the
+ debugger, the inferior continue!!!
+ <braunr> probably because it was notified there wasn't any sender any more
+ <hacklu__> that's funny, I will look deep into thread_resume and kill
+ <braunr> (gdb being the sender here)
+ <hacklu__> in hurd, when gdb attach a inferior, send signal to the
+ inferior, who will get the signal first? the gdb or the inferior?
+ <hacklu__> quite differnet with linux. seems the inferior get first
+ <braunr> do you mean gdb catches its own signal through ptrace on linux ?
+ <hacklu__> kkk
+ <braunr> ?
+
+
+# IRC, freenode, #hurd, 2013-07-20
+
+ <hacklu> braunr: yeah, on Linux the gdb catch the signal from inferior
+ before the signal handler. And that day my network was broken, I can't
+ say goodbye to you. sorry for that.
+
+
+# IRC, freenode, #hurd, 2013-07-22
+
+ <hacklu> hi all, this is my weekly
+ report. http://hacklu.com/blog/gsoc-weekly-report5-152/
+ <teythoon> good to hear that you got the resume issue figured out
+ <hacklu> teythoon: thanks :)
+ <teythoon> hacklu: so your next step is to port gdbserver to hurd?
+ <hacklu> yep, I am already begin to.
+ <hacklu> before the mid-evaluate, I must submit something. I am far behind
+ my personal expections
+ <tschwinge> hacklu: You've made great progress! Sorry, for not being able
+ to help you very much: currently very busy with work. :-|
+ <tschwinge> hacklu: Working on gdbserver now is fine. I understand you
+ have been working on HDebugger to get an understanding of how everyting
+ works, outside of the huge GDB codebase. It's of course fine to continue
+ working on HDebugger to test things, etc., and that also counts very much
+ for the mid-term evaluation, so nothing to worry about. :-)
+ <hacklu> but I have far away behind my application on GSOC. I haven't
+ submit any patches. is it ok?
+ <tschwinge> hacklu: Don't worry. Before doing the actual work, things
+ always look much simpler than they will be. So I was expecting/planning
+ for that.
+ <tschwinge> The Hurd system is complex, with non-trivial and sometimes
+ asynchronous communication between the different components, and so it
+ takes some time to get an understanding of all that.
+ <hacklu> yes, I haven't get all clear about the signal post. that's too
+ mazy.
+ <tschwinge> hacklu: It surely is, yes.
+ <hacklu> tschwinge: may you help me to understand the msg_sig_post(). I
+ don't want to understand all details now, but I want to get the _right_
+ understanding of the gerneral.
+ <hacklu> as I have mentioned on my weekly report, gdb is listening on the
+ inferior's exception port, then gdb post a signal to that port. That
+ says: gdb post a message to herself, and handle it. is this right?
+ <hacklu> tschwinge: [gdb]/gdb/gnu-nat.c (line 1371), and
+ [glibc]/hurd/hurdsig.c(line 1390)
+ <tschwinge> hacklu: My current understanding is that this is a "real"
+ signal that is sent to the debugged process' signal thread (msgport), and
+ when that process is resumed, it will process that signal.
+ <tschwinge> hacklu: This is different from the Mach kernel sending an
+ exception signal to a thread's exception port, which GDB is listening to.
+ <tschwinge> Or am I confused?
+ <hacklu> is the msgport equal the exception port?
+ <hacklu> in my experience, when the thread haven't cause a exception, the
+ signal thread will not be created. after the exception occured, the
+ signal thread is come out. so somebody create it, who dose? the mach
+ kernel?
+ <tschwinge> hacklu: My understanding is that the signal thread would always
+ be present, because it is set up early in a process' startup.
+ <hacklu> but when I call task_threads() before the exception appears, only
+ on thread returned.
+ <tschwinge> "Interesting" -- another thing to look into.
+ <tschwinge> hacklu: Well, you must be right: GDB must also be listening to
+ the debugged process' msgport, because otherwise it wouldn't be able to
+ catch any signals the process receives. Gah, this is all too complex.
+ <hacklu> tschwinge: that's maybe not. gdb listening on the task's exception
+ port, and the signal maybe handle by the signal thread if it could
+ handle. otherwise the signal thread pass the exception to the task's
+ exception port where gdb catched.
+ <tschwinge> hacklu: Ah, I think I now get it. But let me first verify...
+ ;-)
+
+ <hacklu> something strange. I have write a program to check whether create
+ signal threads at begining, the all created!
+ <hacklu> tschwinge: this is my test code and
+ result. http://pastebin.com/xtM6DUnG
+ cat test.c
+ #define _GNU_SOURCE 1
+ #include <stdlib.h>
+ #include <stdio.h>
+ #include <errno.h>
+ #include <mach.h>
+ #include <mach_error.h>
+ int main(int argc,char** argv)
+ {
+ mach_port_t task_port;
+ thread_array_t threads[5];
+ mach_msg_type_number_t num_threads[5];
+ error_t err;
+ task_port = mach_task_self();
+ int i;
+ int j;
+ for(i=0;i<5;i++)
+ if(task_port){
+ err = task_threads(task_port,&threads[i],&num_threads[i]);
+ if(err)
+ printf("err\n");
+ }
+ for(i=0;i<5;i++){
+ printf("===============\n");
+ printf("has %d threads now\n",num_threads[i]);
+ for(j=0;j<num_threads[i];j++)
+ printf("thread[%d]=%d\n",j,threads[i][j]);
+ }
+ return 0;
+ }
+
+
+ and the output
+ ./a.out
+ ===============
+ has 2 threads now
+ thread[0]=87
+ thread[1]=97
+ ===============
+ has 2 threads now
+ thread[0]=87
+ thread[1]=97
+ ===============
+ has 2 threads now
+ thread[0]=87
+ thread[1]=97
+ ===============
+ has 2 threads now
+ thread[0]=87
+ thread[1]=97
+ ===============
+ has 2 threads now
+ thread[0]=87
+ thread[1]=97
+ <hacklu> tschwinge: the result is different with HDebugger case.
+
+ <tschwinge> hacklu: It is my understanding that the two sig_post_untraced
+ RPC calls in inf_signal indeed are invoked on the real msgport (signal
+ thread) if the debugged process.
+ <tschwinge> That port is retrieved via the
+ INF_MSGPORT_RPC/INF_RESUME_MSGPORT_RPC macro, which invoked
+ proc_getmsgport on the proc server, and that will return (unless
+ overridden by proc_setmsgport, but that isn't done in GDB) the msgport as
+ set by [glibc]/hurd/hurdinit.c:_hurd_new_proc_init or _hurd_setproc.
+ <tschwinge> inf_signal is called from gnu_resume, which is via
+ [target_ops]->to_resume is called from target.c:target_resume, which is
+ called several places, for example infrun.c:resume which is used to a)
+ just resume the debugged process, or b) resume it and have it handle a
+ Unix signal (such as SIGALRM, or so), when using the GDB command »signal
+ SIGALRM«, for example.
+ <tschwinge> So such a signal would then not be intercepted by GDB itself.
+ <tschwinge> By the way, this is all just from reading the code -- I hope I
+ got it all right.
+
+ <tschwinge> Another thing: In Mach 3 Kernel Principles, the standard
+ sequence described on pages 22, 23 is thread_suspend, thread_abort,
+ thread_set_state, thread_resume, so you should probably do that in
+ HDebugger too, and not call thread_set_state before.
+ <tschwinge> I would hope the GDB code also follows the standard sequence?
+ Can you please check that?
+
+ <tschwinge> The one thing I'm now confused about is where/how GDB
+ intercepts the standard setup (probably in glibc's signaling mess?) so
+ that it receives any signals raised in the debugged process.
+ <tschwinge> But I'll have to continue later.
+
+ <hacklu___> tschwinge: thanks for your detail answers. I don't realize that
+ the gnu_resume will resume for handle a signal, much thanks for point
+ this:)
+ <hacklu___> tschwinge: I am not exactly comply with <Mach 3 kernel
+ principles> when I call thread_set_state. but I have called a
+ task_suspend before. I think it's not too bad:)
+ <tschwinge> hacklu___: Yes, but be aware that gnu_resume is only relevant
+ if a signal is to be forwarded to the debugged process (to be handled
+ there), but not for the case where GDB intercepts the signal (such as
+ SIGSEGV), and handles it itself without then forwarding it to the
+ application. See the »info signals« GDB command.
+ <hacklu___> I also confused about when to start the signal thread. I will
+ do more experiment.
+ <hacklu___> I have found this: when the inferior is stop at a breakpoint, I
+ use kill to send a CONT to it, the HDebugger will get this message who
+ listening on the exception port.
+
+
+# IRC, freenode, #hurd, 2013-07-28
+
+ <hacklu_> how to understand the rpctrace output?
+ <hacklu_> like this. 142<--143(pid15921)->proc_mark_stop_request (19 0)
+ 125<--1
+ <hacklu_> 27(pid-1)->msg_sig_post_request (20 5 task108(pid15919));
+ <hacklu_> what is the (pid-1)? the kernel?
+ <teythoon> 1 is /hurd/init
+ <hacklu_> pid-1 not means minus 1?
+ <teythoon> ah, funny, you're right... I dunno then
+ <teythoon> 2 is the kernel though
+ <hacklu_> the 142<--143 is port name?
+ <teythoon> could very well be, but I'm not sure, sorry
+ <hacklu_> the number must be the port name.
+ <teythoon> anyone knows why /hurd/init does not get dead name notifications
+ for /hurd/exec like it does for any other essential server?
+ <teythoon> as far as I can see it successfully asks for them
+ <teythoon> about rpctrace, it poses as the kernel for its children, parses
+ and relays any messages sent over the childrens message port, right?
+
+
+# IRC, freenode, #hurd, 2013-07-29
+
+ <hacklu_> hi. this is my weekly
+ report. http://hacklu.com/blog/gsoc-weekly-report6-156/
+ <teythoon> hacklu_: the inferior voluntarily stops itself if it gets a
+ signal and notifies its tracer?
+ <hacklu_> yes
+ <teythoon> what if it chose not to do so? undebugable program?
+ <hacklu_> debugged program will be set an flag so called
+ hurdsig_traced. normal program will handle the signal by himself.
+ <hacklu_> in my env, I found that when GDB attach a running program, gdb
+ will not catch the signal send to the program. May help me try it?
+ <teythoon> it doesn't? I'll check...
+ <teythoon> hacklu_: yes, you're right
+ <hacklu_> you can just gdb a loop program, and kill -CONT to it. If I do
+ this I will get "Can't wait for pid 12332:NO child processes" warning.
+ <teythoon> yes, I noticed that too
+ <teythoon> does gdb reparent the tracee?
+ <hacklu_> I don't think this is a good behavior. gdb should get inferior's
+ signal
+ <teythoon> absolutely
+ <hacklu_> In linux it does, not sure about hurd. but I think it should.
+ <teythoon> definitively. there is proc_child in process.defs, but that may
+ only be used once to set the parent of a process
+ <hacklu_> gdb doesn't set the inferior as its child process if attached a
+ running procss in HURD.
+
+ <tschwinge> hacklu_: So you figured out this tracing/signal stuff. Great!
+ <hacklu_> tschwinge: Hi. not exactly.
+ <hacklu_> as I have mentioned, gdb can't get signal when attach to a
+ running process.
+ <hacklu_> I also want to know how to build glibc in hurd. I have got this "
+ relocation error: ./libc.so: symbol _dl_find_dso_for_object, version
+ GLIBC_PRIVATE not defined in file ld.so.1 with link time reference" when
+ use LD_PRELOAD=./my_build_glibc/libc.so
+ <tschwinge> hacklu: You can't just preload the new libc.so, but you'll also
+ need to use the new ld.so. Have a look at [glibc-build]/testrun.sh for
+ how to invoke these properly. Or, link with
+ »-Wl,-dynamic-linker=[glibc-build]/elf/ld.so,-rpath,[glibc-build]:[glibc-build]/elf
+ -L [glibc-build] -L [glibc-build]/elf«. If using the latter, I suggest
+ to also add »-Wl,-t« to verify that you're linking against the correct
+ libraries, and »ldd
+ <tschwinge> [executable]« to verify that [€xecutable] will load the correct
+ libraries when invoked.
+ <hacklu> I will try that, and I can't find this call
+ pthread_cond_broadcast(). which will called in the proc_mark_stop
+ <tschwinge> hacklu: Oh, right, you'll also need to add libpthread (I think
+ that's the directory name?) to the rpath and -L commands.
+ <hacklu> is libpthread a part of glibc or hurd?
+ <pinotree> glibc
+ <NlightNFotis> hacklu: it is a different repository available here
+ http://git.savannah.gnu.org/cgit/hurd/libpthread.git/
+ <hacklu> tschwinge: thanks for that, but I don't think I need help about
+ the comiler error now, it just say missing some C file. I will look into
+ the Makefile to verify.
+ <NlightNFotis> but I think it's a part of glibc as a whole
+ <tschwinge> hacklu: OK.
+ <tschwinge> glibc is/was a stand-alone package and library, but in Debian
+ GNU/Hurd is nowadays integrated into glibc's build process.
+ <hacklu> NlightNFotis: thanks. I only add hurd, glibc, gdb,mach code to my
+ cscope file. seems need to add libpthread.
+ <tschwinge> hacklu: If you use the Debian glibc package, our libpthread
+ will be in the libpthread subdirectory.
+ <tschwinge> Ignore nptl, which is used for the Linux kernel.
+ <hacklu> tschwinge:BTW, I have found that, to continue the inferior from a
+ breakpoint, doesn't need to call msg_sig_post_untraced. just call
+ thread_abort and thread_resume is already ok.
+ <hacklu> I get the glibc from http://git.savannah.gnu.org/cgit/hurd.
+ <tschwinge> hacklu: That sounds about right, because you want the inferior
+ to continue normally, instead of explicitly sending a (Unix) signal to
+ it.
+ <tschwinge> hacklu: I suggest you use: »apt-get source eglibc« on your Hurd
+ system.
+ <tschwinge> hacklu: The Savannah repository does not yet have libpthread
+ integrated. I have this on my TODO list...
+ <hacklu> tschwinge: no, apt-get source doesn't work in my Hurd. I got any
+ code from git clone ***
+ <pinotree> you most probably lack the deb-src entry in your sources.list
+ <tschwinge> hacklu: Do you have deb-src lines in /etc/apt/source-list? Or
+ how does it fail?
+ <hacklu> tschwinge: I have deb-src lines. and apt-get complain that: E:
+ Unable to find a source package for eglibc or E: Unable to find a source
+ package for glibc
+ <youpi> hacklu: which deb-src lines do you have?
+ <hacklu> and piece of my source_list : deb
+ http://ftp.debian-ports.org/debian unreleased main deb-src
+ http://ftp.debian-ports.org/debian unreleased main
+ <youpi> you also need a deb-src line with the main archive
+ <youpi> deb-src http://cdn.debian.net/debian unstable main
+ <tschwinge> hacklu: Oh, hmm. And you did run »apt-get update« before?
+ That aside, there also is <http://snapshot.debian.org/package/eglibc/>
+ that you can use. You'll need the *.dsc and *.debian.tar.xz files
+ corresponbding to your version of glibc, and the *.orig.tar.xz file. And
+ then run »dpkg-source -x *.dsc«.
+ <tschwinge> The Debian snapshot is often very helpful if you need source
+ packages that are no longer in the main Debian repository.
+ <youpi> or simply running dget on the dsc url
+ <tschwinge> Oh. Good to know.
+ <youpi> e.g. dget
+ http://cdn.debian.net/debian/pool/main/e/eglibc/eglibc_2.17-7.dsc
+ <hacklu> the network is slowly. and I am in apt-get update.
+ <youpi> I will be away from this evening until sunday, too
+ <hacklu> what the main difference between the source site?
+ <hacklu> is dget means wget?
+ <pinotree> no
+ <hacklu> not exist in linux?
+ <pinotree> it does, in devscripts
+ <pinotree> it's a debian tool
+ <hacklu> oh, yes, I have installed devscripts.
+ <hacklu> I have got the libphread code, thanks.
+
+ <braunr> teythoon: the simple fact that this msg thread exists to receive
+ requests and that these requests are sent by ps and procfs is a potential
+ DoS
+ <teythoon> braunr: but does that mean that on Hurd a process can prevent a
+ debugger from intercepting signals?
+ <braunr> teythoon: yes
+ <braunr> that's not a problem for interactive programs
+ <braunr> it's part of the hurd design that programs have limited trust in
+ each other
+ <braunr> a user can interrupt his debugger if he sees no activity
+ <braunr> that's more of a problem for non interactive system stuff like
+ init scripts
+ <braunr> or procfs
+ <hacklu> why gdb can't get inferior's signal if attach a running process?
+ <braunr> hacklu: try to guess
+ <hacklu> braunr: it is not a reasonable thing. I always think it should
+ catch the signal.
+ <braunr> hacklu: signals are a unix thing built on top of mach
+ <braunr> hacklu: think in terms of ports
+ <braunr> all communication on the hurd goes through ports
+ <hacklu> but when use gdb to start a process and debugg it, this way, gdb
+ can catch the signal
+ <braunr> hacklu: my guess is :
+ <braunr> when starting a process, gdb can act as a proxy, much like
+ rpctrace
+ <braunr> when attaching, it can't
+ <hacklu> braunr: ah, my question should ask like this: why gdb can't set
+ the inferior as its child process when attaching it? or it can not ?
+ <braunr> hacklu: i'm not sure, the proc server is one of the parts i know
+ the less
+ <braunr> but again, i guess there is no facility to update the msg port of
+ a process in the proc server
+ <braunr> check that before taking it as granted
+ <hacklu> braunr: aha, I alway think you know everything:)
+ <tschwinge> braunr: There is: setmsgport or similar.
+ <braunr> if there is one, gdb doesn't use it
+ <tschwinge> hacklu: That is a good question -- I can't answer it off-hand,
+ but it might be possible (by setting the tracing flag, and such things).
+ Perhaps it's just a GDB bug, which omits to do that. Perhaps just a
+ one-line code change, perhaps not. That's a new bug (?) report that we
+ may want to have a look at later on.
+ <tschwinge> hacklu: But also note, this new problem is not really related
+ to your gdbserver work -- but of course you're fine to have a look at it
+ if you'd like to.
+ <hacklu> I just to ask for whether this is a normal behavior. this is
+ related to my gdbserver work, as gdbserver also need to attach a running
+ process...
+ <braunr> gdbserver can start a process just like gdb does
+ <braunr> you may want to focus on that first
+ <tschwinge> Yes.
+ <tschwinge> Attaching to processes that are already running is, I think,
+ always more complicated compared to the case where GDB/gdbserver has
+ complete control about the inferior right from the beginning.
+ <hacklu> yes, I am only focus on start one. the attach way I haven't
+ research now.
+ <tschwinge> hacklu: That's totally fine. You can just say that attaching
+ to processes is not supported yet.
+ <hacklu> that's sound good:)
+ <tschwinge> Ther will likely be more things in gdbserver that you won't be
+ able to easily support, so it's fine to do it step-by-step.
+ <tschwinge> And then later add more features incrementally.
+ <tschwinge> That's also easier for reviewing the patches.
+
+ <hacklu> and one more question I have ask yestoday. what is the rpctrace
+ output (pid-1) mean?
+ <tschwinge> hacklu: Another thing I can't tell off-hand. I'll try to look
+ it up.
+ <teythoon> hacklu, tschwinge: my theory is that it is in fact an error
+ message, maybe the proc server did not now a pid for the task
+ <braunr> hacklu: utsl
+ <hacklu> tschwinge: for saving your time, I will look the code myself, I
+ don;t think this is a real hard question need you to help me by reading
+ the source code.
+ <tschwinge> teythoon, hacklu: Yes, from a quick inspection it looks like
+ task2pid returning a -1 PID -- but I can't tell yet what that is supposed
+ to mean, if it's an actualy bug, or just means there is no data
+ available, or similar.
+ <hacklu> braunr: utsl??
+ <tschwinge> hacklu: http://www.catb.org/~esr/jargon/html/U/UTSL.html
+ <hacklu> tschwinge: thank you. braunr like say abbreviation which I can't
+ google out.
+ <tschwinge> hacklu: Again, if this affects your work, it is fine to have a
+ look at that presumed rpctrace problem, if not, it is fine to have a look
+ at it if you'd like to, and otherwise, we'll file it as a possible bug to
+ be looked at laster.
+ <tschwinge> hacklu: Now you learned that one. :-)
+ <hacklu> tschwinge: ok , this doesn't affect me now. If I have time I will
+ figure out it.
+
+ <teythoon> btw, what about the copyright assignment process?
+ <tschwinge> teythoon, hacklu: You still haven't heard from the FSF about
+ your copyright assignments? What's the latest you have heard?
+ <hacklu> tschwinge: I have wrote a emali to ask for that, but no reply.
+ <teythoon> tschwinge: last and only response I got was on July 1st, the
+ last ping with explicit request for confirmation was on July the 12th
+ <tschwinge> hacklu: When did you send this email?
+ <hacklu> tschwinge: last week.
+ <tschwinge> teythoon: I suggest you send another inquiry, and please put me
+ in CC. And if there'S no answer within a couple days (well, I'm away
+ until Monday...), I'll follow up.
+ <tschwinge> hacklu: Likewise for you; depending on when exactly ;-) you
+ sent the last email. (Always allow for a few days until you exect an
+ answer, but if nothing happend within a week for such rather simple
+ administrative tasks, better ask again, unfrotunately.)
+ <hacklu> tschwinge:ok , I will email more
+
+ <hacklu> how to understand the asyn RPC?
+ <braunr> hacklu: hm ?
+ <hacklu> for instance, [hurd]/proc/main.c proc_server is loop in listening
+ message. and handle it by message_demuxer.
+ <hacklu> but when I send a request like proc_wait_request() to it, will it
+ block in the message_demuxer?
+ <hacklu> and where is the function of
+ ports_manage_port_operations_multithread()?
+ <braunr> this one is in libports
+ <braunr> it's the last thing a server calls after bootstrapping itself
+ <braunr> message_demuxer normally blocks, yes
+ <braunr> but it's not "async"
+ <hacklu> the names seems the proc_server is listening message with many
+ threads?
+ <braunr> every server in the hurd does
+ <braunr> threads are created by ports_manage_port_operations_multithread
+ when incoming messages can't be processed quick enough by the set of
+ already existing threads
+ <hacklu> if too many task send request to the server, will it ddos?
+ <braunr> yes
+ <teythoon> every server but /hurd/init
+ <braunr> (and /hurd/hello)
+ <braunr> hacklu: that's, in my opinion, a major design defect
+ <hacklu> yes, that is reasonable.
+ <braunr> that's what causes what i like to call thread storms on message
+ floods ... :)
+ <braunr> my hurd clone is intended to address such major issues
+ <teythoon> couldn't that be migitated by some kind of heuristic?
+ <braunr> it already is ..
+ <hacklu> I don't image that the port_manage_port_operations_multithread
+ will dynamically create threads. I thought the server will hang if all
+ work thread is in use.
+ <braunr> that would also be a major defect
+ <braunr> creating as many threads as necessary is a good thing
+ <braunr> the problem is the dos
+ <braunr> hacklu: btw, ddos is "distributed" dos, and it doesn't really
+ apply to what can happen on the hurd
+ <hacklu> why not ? as far as I known, the message transport is
+ transparent. hurd has the chance to be DDOSed
+ <braunr> we don't care about the distributed property of the dos
+ <hacklu> oh, I know what you mean.
+ <braunr> it simply doesn't matter
+ <braunr> on thread calling select in an event loop with a low timeout (high
+ frequency) on a bunch of file descriptors is already enough to generate
+ many dead-name notifications
+ <tschwinge> Oh! Based on what I've read in GDB source code, I thought the
+ proc server was single-threaded. However, it no longer is, after 1996's
+ Hurd commit fac6d9a6d59a83e96314103b3181f6f692537014.
+ <braunr> those notifications cause message flooding at servers (usually
+ pflocal/pfinet), which spawn a lot of threads to handle those messages
+ <braunr> one* thread
+ <hacklu> tschwinge: ah, the comment in gnu_nat.c is out of date!
+ <braunr> hacklu: and please, please, clean the hello_world processes you're
+ creating on darnassus
+ <braunr> i had to do it myself again :/
+ <hacklu> braunr: [hacklu@darnassus ~]$ ps ps: No applicable processes
+ <braunr> ps -eflw
+ <braunr> htop
+ <tschwinge> hacklu: Probably the proc_wait_pid and proc_waits_pending stuff
+ could be simplified then? (Not an urgent issue, of course, will file as
+ an improvement for later.)
+ <hacklu> braunr: ps -eflw |grep hacklu
+ <hacklu> 1038 12360 10746 26 26 2 87 22 148M 1.06M 97:21001 S
+ p1 0:00.00 grep --color=auto hacklu
+ <braunr> 15:08 < braunr> i had to do it myself again :/
+ <teythoon> braunr: so as a very common special case, a lot of dead name
+ notifications cause problems for pf*?
+ <braunr> and use your numeric uid
+ <braunr> teythoon: yes
+ <hacklu> braunr: I am so sorry. I only used ps to check. forgive me
+ <braunr> teythoon: simply put, a lot of messages cause problems
+ <braunr> select is one special use case
+ <teythoon> braunr: blocking other requests?
+ <braunr> the other is page cache writeback
+ <braunr> creating lots of threads
+ <braunr> potentially deadlocking on failure
+ <braunr> and in the case of writebacks, simply starving
+ <teythoon> braunr: but dead name notifications should mostly trigger
+ cleanup actions, couldn't those be handled by a different thread(pool)
+ than the rest?
+ <braunr> that's why you can bring down a hurd system with a simple cp
+ bigfile somewhere, bigfile being a few hundreds MiBs
+ <braunr> teythoon: it doesn't change the problem
+ <braunr> threads are per task
+ <braunr> and the contention would remain the same
+ <teythoon> hm
+ <braunr> since dead-name notifications are meant to release resources
+ created by what would then be "regular" threads
+ <braunr> don't worry, there is a solution
+ <braunr> it's simple
+ <braunr> it's well known
+ <braunr> it's just hard to directly apply to the hurd
+ <braunr> and impossible to enforce on mach
+ <hacklu> tschwinge: I am confuzed after I have look into S_proc_wait()
+ [hurd/proc/wait.c], it has relate pthread_hurd_cond_wait_np. I can't find
+ out when it will return. And the signal is report to the debuger by
+ S_proc_wait.
+ <teythoon> braunr: a pointer please ;)
+ <braunr> teythoon: basically, synchronous ipc
+ <braunr> then, enforcing one server thread per client thread
+ <braunr> and replace mach-generated notifications with messages sent from
+ client threads
+ <braunr> the only kind of notification required by the hurd are no-senders
+ notifications
+ <braunr> this happens when a client releases all references it has to a
+ resource
+ <braunr> so it's easy to make that synchronous as well
+ <braunr> trying to design RPCs as closely as system calls on monolithic
+ kernels helps in viewing how this works
+ <braunr> the only real additions are address space crossing, and capability
+ invocation
+ <teythoon> sounds reasonable, why is it hard to apply to the hurd? most
+ rpcs are synchonous, no?
+ <braunr> mach ipc isn't
+ <hacklu> braunr: When client C send a request to server S, but doesn't wait
+ for the reply message right now, for a while, C call mach_msg to recieve
+ reply. Can I think this is a synchronous RPC?
+ <braunr> a malicious client can still overflow message queues
+ <braunr> hacklu: no
+ <teythoon> yes, I can see how this is impossible to enforce, but still we
+ could all try to play nice :)
+ <braunr> teythoon: no
+ <braunr> :)
+ <braunr> async ipc is heavy, error-prone, less performant than sync ipc
+ <braunr> some async ipc is necessary to handle asynchronous events, but
+ something like unix signals is actually a lot more appropriate
+ <braunr> we're diverging from the gsoc though
+ <braunr> don't waste too much time on that
+ <teythoon> 15:13 < braunr> it's just hard to directly apply to the hurd
+ <teythoon> I wont
+ <teythoon> why is it hard
+ <braunr> almost everything is synchronous on the hurd
+ <braunr> except a few critical bits
+ <braunr> signals :)
+ <braunr> and select
+ <braunr> and pagecache writebacks
+ <braunr> fixing those parts require some work
+ <braunr> which isn't trivial
+ <braunr> for example, select should be rewritten not to use dead-name
+ notifications
+ <teythoon> adding a light weight signalling mechanism to mach and using
+ that instead of async ipc?
+ <braunr> instead of destroying ports once an event has been received, it
+ should (synchyronously) remove the requests installed at remote servers
+ <braunr> uh no
+ <braunr> well maybe but that would be even harder
+ <tschwinge> hacklu: This (proc/wait.c) is related to POSIX thread
+ cancellation -- I don't think you need to be concerned about that. That
+ function's "real" exit points are earlier above.
+ <braunr> teythoon: do you understand what i mean about select ?
+ <teythoon> ^^ is that a no go area?
+ <braunr> for now it is
+ <braunr> we don't want to change the mach interface too much
+ <teythoon> yes, I get the point about select, but I haven't looked at its
+ implementation yet
+ <hacklu> tschwinge: when I want to know the child task's state, I call
+ proc_wait_request(), unless the child's state not change. the
+ S_proc_wait() will not return?
+ <braunr> it creates ports, puts them in a port set, gives servers send
+ rights so they can notify about events
+ <teythoon> y not? it's not that hurd is portable to another mach, or is it?
+ and is there another that we want to be compatible with?
+ <braunr> when an event occurs, all ports are scanned
+ <braunr> then destroyed
+ <braunr> on destruction, servers are notified by mach
+ <braunr> the problem is that the client is free to continue and make more
+ requests while existing select requests are still being cancelled
+ <teythoon> uh, yeah, that sounds like a costly way of notifying somewone
+ <braunr> the cost isn't the issue
+ <braunr> select must do something like that on a multiserver system, you
+ can't do much about it
+ <braunr> but it should be synchronous, so a client can't make more requests
+ to a server until the current select call is complete
+ <braunr> and it shouldn't use a server approach at the client side
+ <braunr> client -> server should be synchronous, and server -> client
+ should be asynchronous (e.g. using a specific SIGSELECT signal like qnx
+ does)
+ <braunr> this is a very clean way to avoid deadlocks and denials of service
+ <teythoon> yes, I see
+ <braunr> qnx actually provides excellent documentation about these issues
+ <braunr> and their ipc interface is extremely simple and benefits from
+ decades of experience on the subject
+ <tschwinge> hacklu: This function implements the POSIX wait call, and per
+ »man 2 wait«: »The wait() system call suspends execution of the calling
+ process until one of its children terminates.«
+ <tschwinge> hacklu: This is implemented in glibc in sysdeps/posix/wait.c,
+ sysdeps/unix/bsd/bsd4.4/waitpid.c, sysdeps/mach/hurd/wait4.c, by invoking
+ this RPC synchronously.
+ <tschwinge> hacklu: GDB on the other hand, uses this infrastructure (as I
+ understand it) to detect (that is, to be informed) when a debuggee exits
+ (that is, when the inferior process terminates).
+ <tschwinge> hacklu: Ah, so maybe I miss-poke earlier: the
+ pthread_hurd_cond_wait_np implements the blocking. And depending on its
+ return value the operation will be canceled or restarted (»start_over«).
+ <tschwinge> s%maybe%%
+ <tschwinge> hacklu: Does this information help?
+ <hacklu> tschwinge: proc_wait_request is not only to detect the inferior
+ exit. it also detect the child's state change
+ <braunr> as tschwinge said, it's wait(2)
+ <hacklu> tschwinge: and I have see this, when kill a signal to inferior,
+ the gdb will get the message id=24120 which come from S_proc_wait
+ <hacklu> braunr: man 2 wait says: wait, waitpid, waitid - wait for process
+ to change state. (in linux, in hurd there is no man wait)
+ <braunr> uh
+ <braunr> there is, it's the linux man page :)
+ <braunr> make sure you have manpages-dev installed
+ <hacklu> I always think we are talk about linux's manpage :/
+ <hacklu> but regardless the manpage, gdb really call proc_wait_request() to
+ detect whether inferior's changed states
+ <braunr> in any case, keep in mind the hurd is intended to be a posix
+ system
+ <braunr> which means you can always refer to what wait is expected to do
+ from the posix spec
+ <braunr> see
+ http://pubs.opengroup.org/onlinepubs/9699919799/functions/wait.html
+ <hacklu> braunr: even in the manpags under hurd, man 2 wait also says: wait
+ for process to change state.
+ <braunr> yes
+ <braunr> that's what it's for
+ <braunr> what's the problem ?
+ <hacklu> the problem is what tschwinge has said I don't understand. like
+ and per »man 2 wait«: »The wait() system call suspends execution of the
+ calling process until one of its children terminates.«
+ <braunr> terminating is a form of state change
+ <braunr> historically, wait was intended to monitor process termination
+ only
+ <hacklu> so the thread become stoped wait also return
+ <braunr> afterwards, process tracing was added too
+ <braunr> what ?
+ <hacklu> so when the child state become stopped, the wait() call will
+ return?
+ <braunr> yes
+ <hacklu> and I don't know this pthread_hurd_cond_wait_np.
+ <braunr> wait *blocks* until the process it references changes state
+ <braunr> pthread_hurd_cond_wait_np is the main blocking function in hurd
+ servers
+ <braunr> well, pthread_hurd_cond_timedwait_np actually
+ <braunr> all blocking functions end up there
+ <braunr> (or in mach_msg)
+ <braunr> (well pthread_hurd_cond_timedwait_np calls mach_msg too)
+ <hacklu> since I use proc_wait_request to get the state change, so the
+ thread in proc_server will be blocked, not me. is that right?
+ <braunr> no
+ <braunr> both
+ <hacklu> this is just a request, why should block me?
+ <braunr> because you're waiting for the reply afterwards
+ <braunr> or at least, you should be
+ <braunr> again, i'm not familiar with those parts
+ <hacklu> after call proc_wait_request(), gdb does a lot stuffs, and then
+ call mach_msg to recieve reply.
+ <braunr> ok
+ <hacklu> I think it will be blocked only in mach_msg() if need.
+ <braunr> usually, xxx_request are the async send-only versions of RPCs
+ <tschwinge> Yes, that'S my understanding too.
+ <braunr> and xxx_reply the async receive-only
+ <braunr> so that makes sense
+ <hacklu> so I have ask you is it a asyn RPC.
+ <braunr> yes
+ <braunr> 15:18 < hacklu> braunr: When client C send a request to server S,
+ but doesn't wait for the reply message right now, for a while, C call
+ mach_msg to recieve reply. Can I think this is a synchronous RPC?
+ <braunr> 15:19 < braunr> hacklu: no
+ <braunr> if it's not synchronous, it's asynchronous
+ <hacklu> sorry, I spell wrong. missing a 'a' :/
+ <tschwinge> S_proc_wait_reply will then be invoked once the procserver
+ actually answers the "blocking" proc_wait call.
+ <tschwinge> Putting "blocking" in quotes, because (due to the asyncoronous
+ RPC invocation), GDB has not actually blocked on this.
+ <braunr> well, it doesn't call proc_wait
+ <hacklu> tschwinge: yes, the S_proc_wait_reply is called by
+ process_reply_server().
+ <hacklu> tschwinge: so the "blocked" one is the thread in proc_server .
+ <tschwinge> braunr: Right. »It requests the proc_wait service.«
+ <braunr> gdb will also block on mach_msg
+ <braunr> 16:05 < braunr> both
+ <hacklu> braunr: yes, if gdb doesn't call mach_msg to recieve reply it will
+ not be blocked.
+ <braunr> i expect it will always call mach_msg
+ <braunr> right ?
+ <hacklu> braunr: yes, but before it call mach_msg, it does a lot other
+ things. but finally will call mach_msg
+ <braunr> that's ok
+ <braunr> that's the kind of things asynchronous IPC allows
+ <hacklu> tschwinge: I have make a mistake in my week report. The signal
+ recive by inferior is notified by the proc_server, not the
+ send_signal. Because the send_singal send a SIGCHLD to gdb's msgport not
+ gdbself. That make sense.
+
+
+# IRC, freenode, #hurd, 2013-07-30
+
+ <hacklu> braunr: before I go to sleep last night, this question pop into my
+ mind. How do you find my hello_world is still alive on darnassus? The
+ process is not a CPU-heavy or IO-heavy guy. You will not feel any
+ performance penalization. I am so curious :)
+ <teythoon> hacklu: have you looked into patching the proc server to allow
+ reparenting of processes?
+ <hacklu> teythoon:not yet
+ <teythoon> hacklu: i've familiarized myself with proc in the last week,
+ this should get you started nicely: http://paste.debian.net/19985/
+ diff --git a/proc/mgt.c b/proc/mgt.c
+ index 7af9c1a..a11b406 100644
+ --- a/proc/mgt.c
+ +++ b/proc/mgt.c
+ @@ -159,9 +159,12 @@ S_proc_child (struct proc *parentp,
+ if (!childp)
+ return ESRCH;
+
+ + /* XXX */
+ if (childp->p_parentset)
+ return EBUSY;
+
+ + /* XXX if we are reparenting, check permissions. */
+ +
+ mach_port_deallocate (mach_task_self (), childt);
+
+ /* Process identification.
+ @@ -176,6 +179,7 @@ S_proc_child (struct proc *parentp,
+ childp->p_owner = parentp->p_owner;
+ childp->p_noowner = parentp->p_noowner;
+
+ + /* XXX maybe need to fix refcounts if we are reparenting, not sure */
+ ids_rele (childp->p_id);
+ ids_ref (parentp->p_id);
+ childp->p_id = parentp->p_id;
+ @@ -183,11 +187,14 @@ S_proc_child (struct proc *parentp,
+ /* Process hierarchy. Remove from our current location
+ and place us under our new parent. Sanity check to make sure
+ parent is currently init. */
+ - assert (childp->p_parent == startup_proc);
+ + assert (childp->p_parent == startup_proc); /* XXX */
+ if (childp->p_sib)
+ childp->p_sib->p_prevsib = childp->p_prevsib;
+ *childp->p_prevsib = childp->p_sib;
+
+ + /* XXX we probably want to keep a reference to the old
+ + childp->p_parent around so that if the debugger dies or detaches,
+ + we can reparent the process to the old parent again */
+ childp->p_parent = parentp;
+ childp->p_sib = parentp->p_ochild;
+ childp->p_prevsib = &parentp->p_ochild;
+ <teythoon> the code doing the reparenting is already there, but for now it
+ is only allowed to happen once at process creation time
+ <hacklu> teythoon: good job. This is in my todo list, when I implement
+ attach feature to gdbserver I will need this
+ <braunr> hacklu: i use htop
+ <teythoon> braunr: why is that process so disruptive?
+ <braunr> the big problem with those stale processes is that they're in a
+ state that prevents one important script to complete
+ <braunr> there is a bug on the hurd with regard to terminals
+ <braunr> when you log out of an ssh session, the terminal remains open for
+ some reason (bad reference counting somewhere, but it's quite tricky to
+ identify)
+ <braunr> to work around the issue, i have a cron job that calls a script to
+ kill unused terminals
+ <braunr> this works by listing processes
+ <braunr> your hello_world processes block that listing
+ <teythoon> uh, how so?
+ <hacklu> braunr: ok. I konw.
+ <braunr> teythoon: probably the denial of service we were talking about
+ yesterday
+ <teythoon> select flooding a server?
+ <braunr> no, a program refusing to answer on its msg port
+ <braunr> ps has an option -M :
+ <braunr> -M, --no-msg-port Don't show info that uses a process's
+ msg port
+ <braunr> the problem is that my script requires those info
+ <teythoon> ah, I see, right
+ <braunr> hacklu being working on gdb, it's not surprising he's messing with
+ that
+ <teythoon> yes indeed. couldn't ps use a timeout to detect that?
+ <hacklu> braunr: yes, once I have found ps will hang when I has run
+ hello_world in a breakpoint state.
+ <teythoon> braunr: thanks for explaining the issue, i always wondered why
+ that process is such big a deal ;)
+ <braunr> teythoon: how do you tell between processes being slow to answer
+ and intentionnally refusing to answer ?
+ <braunr> a timeout is almost never the right solution
+ <braunr> sometimes it's the only solution though, like for networking
+ <braunr> but on a system running on a local machine, there is usually
+ another way
+ <teythoon> braunr: I don't of course
+ <braunr> ?
+ <braunr> ah ok
+ <braunr> it was rethorical :)
+ <teythoon> yes I know, and I was implying that I wasn't expecting a timeout
+ to be the clean solution
+ <teythoon> and the current behaviour is hardly acceptable
+ <braunr> i agree
+ <braunr> it's ok for interactive cases
+ <braunr> you can use Ctrl-C, which uses a 3 seconds delay to interrupt the
+ client RPC if nothing happens
+ <teythoon> braunr: btw, what about *_reply.defs? Should I add a
+ corresponding reply simpleroutine if I add a routine?
+ <braunr> normally yes
+ <braunr> right, forgot about that
+ <teythoon> so that the procedure ids are kept in sync in case one wants to
+ do this async at some point in the future?
+ <braunr> yes
+ <braunr> this happened with select
+ <braunr> i had to fix the io interface
+ <teythoon> ok, noted
+
+
+# IRC, freenode, #hurd, 2013-07-31
+
+ <hacklu> Do we need write any other report for the mid-evaluation? I have
+ only submit a question-answer to google.
+
+
+# IRC, freenode, #hurd, 2013-08-05
+
+ <hacklu> hi, this is my weekly
+ report. http://hacklu.com/blog/gsoc-weekly-report7build-gdbserver-on-gnuhurd-164/
+ <hacklu> youpi: can you show me some suggestions about how to design the
+ interface and structure of gdbserver?
+ <youpi> hacklu: well, I've read your blog entry, I was wondering about
+ tschwinge's opinion, that's why I asked whether he was here
+ <youpi> I would tend to start from an existing gdbserver, but as I haven't
+ seen the code at all, I don't know how much that can help
+ <hacklu> so you mean I shoule get a worked gdbserver then to improve it?
+ <youpi> I'd say so, but again it's not a very strong opinion
+ <youpi> I'd rather let tschwinge comment on this
+ <hacklu> youpi: ok :)
+
+ <youpi> how about the copyright assignments? did hacklu or teythoon receive
+ any answer?
+ <teythoon> youpi: I did, the copyright clerk told me that he finally got my
+ papers and that everything is in order now
+ <youpi> few!
+ <youpi> s/f/ph
+ <youpi> teythoon: you mean all steps are supposed to be done now, or is he
+ doing the last steps? I don't see your name in the copyright folder yet
+ <teythoon> youpi: well, he said that he had the papers and they are about
+ to be signed
+ <youpi> teythoon: ok, so it's not finished, that's why your name is not on
+ the list yet
+ <youpi> this paper stuff is really a pain
+ <hacklu> youpi: I haven't got any answer from FSF now.
+ <youpi> did you ping them recently?
+ <hacklu> I have pinged 2 week ago.
+ <hacklu> what you mean of ping? I just write an email to him. Is it enough?
+ <youpi> yes
+
+
+# IRC, freenode, #hurd, 2013-08-12
+
+ <hacklu> hi, this is my weekly report
+ http://hacklu.com/blog/gsoc-weekly-report8-168/ . sorry for so late.
+
+ <youpi> hacklu: it seems we misunderstood ourselves last week, I meant to
+ start from the existing gdbserver implementation
+ <youpi> but never mind :)
+ <youpi> starting from the lynxos version was a good idea
+ <hacklu> youpi: em... yeah, the lynxos port is so clean and simple.
+
+ <hacklu> youpi: aha, the "Remote connection closed" problem has been fixed
+ after I add a init_registers_i386() and set the structure target_desc.
+ <hacklu> but I don't get understand with the structure target_desc. I only
+ know it is auto-generated which configured by the configure.srv.
+ <tschwinge> Hi!
+ <tschwinge> hacklu: In gdbserver, you should definitely re-use existing
+ infrastructure, especially anything that deals with the
+ protocol/communication with GDB (that is, server.c and its support
+ files).
+ <tschwinge> hacklu: Then, for the x86 GNU Hurd port, it should be
+ implemented in the same way as an existing port. The Linux port is the
+ obvious choice, of course, but it is also fine to begin with something
+ simpler (like the LynxOS port you've chosen), and then we can still add
+ more features later on. That is a very good approach actually.
+ <tschwinge> hacklu: The x86 GNU Hurd support will basically consist of
+ three pieces -- exactly as with GDB's native x86 GNU Hurd port: x86
+ processor specific (tge existing gdbserver/i386-low.c etc. -- shouldn't
+ need any modifications (hopefully)), GNU Hurd specific
+ (gdbserver/gnu-hurd-low.c (or similar)), and x86 GNU Hurd specific
+ (gdbserver/gnu-hurd-x86-low.c (or similar)).
+ <tschwinge> s%tge%the
+ <hacklu> tschwinge: now I have only add a file named gnu-low.c, I should
+ move some part to the file gnu-i386-low.c I think.
+ <tschwinge> hacklu: That's fine for the moment. We can move the parts
+ later (everything with 86 in its name, probably).
+ <hacklu> that's ok.
+ <hacklu> tschwinge: Can I copy code from gnu-nat.c to
+ gdbserver/gnu-hurd-low.c? I think the two file will have many same code.
+ <tschwinge> hacklu: That's correct. Ideally, the code should be shared
+ (for example, in a file in common/), but that's an ongoing discussion in
+ GDB, for other duplicated code. So, for the moment, it is fine to copy
+ the parts you need.
+ <tschwinge> hacklu: Oh, but it may be a good idea to add a comment to the
+ source code, where it is copied from.
+ <hacklu> maybe I can do a common-part just for hurd gdb port.
+ <tschwinge> That should make it easier later on, to consolidate the
+ duplicated code into one place.
+ <tschwinge> Or you can do that, of course. If it's not too difficult to
+ do?
+ <hacklu> I think at the begining it is not difficult. But when the
+ gdbserver code grow, the difference with gdb is growing either. That will
+ be too many #if else.
+ <tschwinge> I think we should check with the GDB maintainers, what they
+ suggest.
+ <tschwinge> hacklu: Please send an email To: <gdb@sourceware.org> Cc:
+ <lgustavo@codesourcery.com>, <thomas@codesourcery.com>, and ask about
+ this: you need to duplicate code that already exists in gnu-nat.c for new
+ gdbserver port -- how to share code?
+ <hacklu> tschwinge: ok, I will send the email right now.
+ <hacklu> tschwinge: need I cc to hurd mail-list?
+ <tschwinge> hacklu: Not really for that questions, because that is a
+ question only relevant to the GDB source code itself.
+ <hacklu> tschwinge: got it.
+
+[[!message-id
+"CAB8fV=jzv_rPHP3-HQVBA-pCNZNat6PNbh+OJEU7tZgQdKX3+w@mail.gmail.com"]].
+
+
+# IRC, freenode, #hurd, 2013-08-19
+
+<http://hacklu.com/blog/gsoc-weekly-report9-172/>.
+
+ <hacklu__> when and where is the best time and place to get the regitser
+ value in gdb?
+ <youpi> well, I'm not sure to understand the question
+ <youpi> you mean in the gdb source code, right?
+ <youpi> isn't it already done in gdb?
+ <youpi> probably similarly to i386?
+ <youpi> (linux i386 I mean)
+ <hacklu__> I don't find the fetch_register or relate function implement in
+ gnu-nat.c
+ <hacklu__> so I can't make decision how to implement this in gdbserver.
+ <youpi> it's in i386gnu-nat.c, isn't it?
+ <hacklu__> yeah.
+ <youpi> does that answer your issue?
+ <hacklu__> thank you. I am so stupid
+
+
+# IRC, freenode, #hurd, 2013-08-26
+
+ < hacklu> hello everyone, this is my week
+ report. http://hacklu.com/blog/gsoc-weekly-report10-174/
+
+ < hacklu> btw, my FSF copyright assignment has been concepted. They guy
+ said, they have recived my mail for a while but forget to handle it.
+
+ < hacklu> but now I face a new problem, when I typed the first continue
+ command, gdb will continue all the breakpoint, and the inferior will run
+ until normally exit.
+
+
+# IRC, freenode, #hurd, 2013-08-30
+
+ <hacklu> tschwinge: hi, does gdb's attach feature work correctlly on Hurd?
+ <hacklu> on my hurd-box, the gdb can't attach to a running process, after a
+ attaching, when I continue, gdb complained "can't find pid 12345"
+ <teythoon> hacklu: attaching works, not sure why gdb is complaining
+ <hacklu> teythoon: yeah, it can attaching, but can't contine process.
+ <hacklu> in this case, the debugger is useless if it can't resume execution
+ <teythoon> hacklu: well, gdb on Linux reacts a little differently, but for
+ me attaching and then resuming works
+ <hacklu> teythoon: yes, gdb on linux works well.
+ <teythoon> % gdb --pid 21506 /bin/sleep
+ <teythoon> [...]
+ <teythoon> (gdb) c
+ <teythoon> Continuing.
+ <teythoon> warning: Can't wait for pid 21506: No child processes
+ <teythoon> # pkill -SIGILL sleep
+ <teythoon> warning: Pid 21506 died with unknown exit status, using SIGKILL.
+ <hacklu> yes. I used a sleep program to test too.
+ <teythoon> I believe that the warning and deficiencies with the signal
+ handling are b/c on Hurd the debuggee cannot be reparented to the
+ debugger
+ <hacklu> oh, I remembered, I have asked this before.
+ <tschwinge> Confirming that attaching to a process in __sleep -> __mach_msg
+ -> mach_msg_trap works fine, but then after »continue«, I see »warning:
+ Can't wait for pid 4038: No child processes« and three times »Can't fetch
+ registers from thread bogus thread id 1: No such thread« and the sleep
+ process exits (normally, I guess? -- interrupted "system call").
+ <tschwinge> If detaching (exit GDB) instead, I see »warning: Can't modify
+ tracing state for pid 4041: No such process« and the sleep process exits.
+ <tschwinge> Attaching to and then issueing »continue« in a process that is
+ not currently in a mach_msg_trap (tested a trivial »while (1);«) seems to
+ work.
+ <tschwinge> hacklu: ^
+ <hacklu> tschwinge: in my hurdbox, if I just attach a while(1), the system
+ is near down. nothing can happen, maybe my hardware is slow.
+ <hacklu> so I can only test on the sleep one.
+ <hacklu> my gdbserver doesn't support attach feature now. the other basic
+ feather has implement. I am doing test and review the code now.
+ <tschwinge> Great! :-)
+ <tschwinge> It is fine if attaching does not work currently -- can be added
+ later.
+ <hacklu> btw, How can I submit my code? put the patch in email directly?
+ <tschwinge> Did you already run the GDB testsuite using your gdbserver?
+ <hacklu> no, haven't yet
+ <tschwinge> Either that, or a Git branch to pull from.
+ <hacklu> I think I should do more review and test than I submit patches.
+ <tschwinge> hacklu: See [GDB]/gdb/testsuite/boards/native-gdbserver.exp
+ (and similar files) for how to run the GDB testsuite with gdbserver.
+ <hacklu> ok.
+ <tschwinge> But don't be disappointed if there are still a lot of failures,
+ etc. It'll already be great if some basic stuff works.
+ <hacklu> now it can set and remove breakpoint. show register, access
+ variables.
+ <tschwinge> ... which already is enogh for a lot of debugging sessions.
+ :-)
+ <hacklu> I will continue to make it more powerful.
+ <hacklu> :)
+ <tschwinge> Yes, but please first work on polishing the existing code, and
+ get it integrated upstream. That will be a great milestone.
+ <tschwinge> No doubt that GDB maintainers will have lots of comments about
+ proper formatting of the source code, and such things. Trivial, but will
+ take time to re-work and get right.
+ <hacklu> oh, I got it. I will give my pathch before this weekend.
+ <tschwinge> Then once your basic gdbserver is included, you can continue to
+ implement additional features, piece by piece.
+ <tschwinge> And then we can run the GDB testsuite with gdbserver and
+ compare that there are no regressions, etc.
+ <tschwinge> Heh, »before the weekend« -- that's soon. ;-)
+ <hacklu> honestly to say, most of the code is copyed from other files, I
+ haven't write too many code myself.
+ <tschwinge> Good -- this is what I hoped. Often, more time in software
+ development is spent on integrating existing things rathen than writing
+ new code.
+ <hacklu> but I have spent a lot of time to get known the code and to debug
+ it to work.
+ <tschwinge> Thzis is normal, and is good in fact: existing code has already
+ been tested and documented (in theory, at least...).
+ <tschwinge> Yes, that's expected too: when relying on/reusing existing
+ code, you first have to understand it, or at least its interfaces. Doing
+ that, you're sort of "mentally writing the existing code again".
+ <tschwinge> So, this sounds all fine. :-)
+ <hacklu> your words make me happy.
+ <hacklu> :)
+ <tschwinge> Well, I am, because this seems to be going well.
+ <hacklu> thank you. I am going to coding now~~
+
+
+# IRC, freenode, #hurd, 2013-09-02
+
+ <hacklu> hi, this is my weekly
+ report. http://hacklu.com/blog/gsoc-weekly-report11-181/
+
+ <hacklu> please give me any advice on how to use mig to generate stub-files
+ in gdbserver?
+ <braunr> hacklu:
+ http://darnassus.sceen.net/gitweb/rbraun/slabinfo.git/blob/HEAD:/Makefile
+ <hacklu> braunr: shouldnt' I work like this
+ https://github.com/hacklu/gdbserver/blob/gdbserver/gdb/config/i386/i386gnu.mh
+ ?
+ <braunr> hacklu: seems that you need server code
+ <braunr> other than that i don't see the difference
+ <hacklu> gdb use autoconf to generate the Makefile, and part from the *.mh
+ file, but in gdbserver, there is no .mh like files.
+ <braunr> hacklu: why can't you reuse /i386gnu.mh ?
+ <hacklu> braunr: question is that, there are something not need in
+ /i386gnu.mh.
+ <braunr> hacklu: like what ?
+ <hacklu> braunr: like fork-child.o msg_U.o core-regset.o
+ <braunr> hacklu: well, adjust the dependencies as you need
+ <braunr> hacklu: do you mean they become useless for gdbserver but are
+ useful for gdb ?
+ <hacklu> braunr: yes, so I need another one gnu.mh file.
+ <hacklu> braunr: but the gdbserver's configure doesn't have any *.mh file,
+ can I add the first one?
+ <braunr> or adjust the values of those variables depending on the building
+ mode
+ <braunr> maybe
+ <braunr> tschwinge is likely to better answer those questions
+ <hacklu> braunr: ok, I will wait for tschwinge's advice.
+ <luisgpm> hacklu, The gdb/config/ dir is for files related to the native
+ gdb builds, as opposed to a cross gdb that does not have any native bits
+ in it. In the latter, gdbserver will be used to touch the native layer,
+ and GDB will only guide gdbserver through the debugging session...
+ <luisgpm> hacklu, In case you haven't figured that out already.
+ <hacklu> luisgpm: I am not very clear with you. According to your words, I
+ shouldn't use gdb/config for gdbserver?
+ <luisgpm> hacklu, Correct. You should use configure.srv for gdbserver.
+ <luisgpm> hacklu, gdb/gdbserver/configure.srv that is.
+ <luisgpm> hacklu, gdb/configure.tgt for non-native gdb files...
+ <luisgpm> hacklu, and gdb/config for native gdb files.
+ <luisgpm> hacklu, The native/non-native separation for gdb is due to the
+ possibility of having a cross gdb.
+ <congzhang> what's srv file purpose?
+ <luisgpm> hacklu, gdbserver, on the other hand, is always native.
+ <luisgpm> Doing the target-to-object-files mapping.
+ <hacklu> how can I use configure.srv to config the MIG to generate
+ stub-files?
+ <luisgpm> What are stub-files in this context?
+ <hacklu> On Hurd, some rpc stub file are auto-gen by MIG with *.defs file
+ <braunr> luisgpm: c source code handling low level ipc stuff
+ <braunr> mig is the mach interface generator
+ <tschwinge> luisgpm, hacklu: If that is still helpful by now, in
+ <http://news.gmane.org/find-root.php?message_id=%3C87ppwqlgot.fsf%40kepler.schwinge.homeip.net%3E>
+ I described the MIG usage in GDB. (Which also states that ptrace is a
+ system call which it is not.)
+ <tschwinge> hacklu: For the moment, it is fine to indeed copy the rules
+ related to MIG/RPC stubs from gdb/config/i386/i386gnu.mh to a (possibly
+ new) file in gdbserver. Then, later, we should work out how to properly
+ share these, as with all the other code that is currently duplicated for
+ GDB proper and gdbserver.
+ <luisgpm> hacklu, tschwinge: If there is code gdbserver and native gdb can
+ use, feel free to put them inside gdb/common for now.
+ <tschwinge> hacklu, luisgpm: Right, that was the conclusion from
+ <http://news.gmane.org/find-root.php?message_id=%3CCAB8fV%3Djzv_rPHP3-HQVBA-pCNZNat6PNbh%2BOJEU7tZgQdKX3%2Bw%40mail.gmail.com%3E>.
+ <hacklu> tschwinge, luisgpm : ok, I got it.
+ <hacklu> tschwinge: sorry for haven't submit pathes yet, I will try to
+ submit my patch tomorrow.
+
+[[!message-id "CAB8fV=iw783uGF8sWyqJNcWR0j_jaY5XO+FR3TyPatMGJ8Fdjw@mail.gmail.com"]].
+
+
+# IRC, freenode, #hurd, 2013-09-06
+
+ <hacklu> If I want compile a file which is not in the current directory,
+ how should I change the Makefile. I have tried that obj:../foo.c, but the
+ foo.o will be in ../, not in the current directory.
+ <hacklu> As say, When I build gdbserver, I want to use [gdb]/gdb/gnu-nat.c,
+ How can I get the gnu-nat.o under gdbserver's directory?
+ <hacklu> tschwinge: ^^
+ <tschwinge> Hi!
+ <tschwinge> hacklu: Heh, unexpected problem.
+ <tschwinge> hacklu: How is this handled for the files that are already in
+ gdb/common/? I think these would have the very same problem?
+ <hacklu> tschwinge: ah.
+ <hacklu> I got it
+ <tschwinge> I see, for example:
+ <tschwinge> ./gdb/Makefile.in:linux-btrace.o:
+ ${srcdir}/common/linux-btrace.c
+ <tschwinge> ./gdb/gdbserver/Makefile.in:linux-btrace.o:
+ ../common/linux-btrace.c $(linux_btrace_h) $(server_h)
+ <hacklu> If I have asked before, I won't use soft link to solve this.
+ <tschwinge> But isn't that what you've been trying?
+ <hacklu> when this, where the .o file go to?
+ <tschwinge> Yes, symlinks can't be used, because they're not available on
+ every (file) system GDB can be built on.
+ <tschwinge> I would assume the .o files to go into the current working
+ directory.
+ <tschwinge> Wonder why this didn't work for you.
+ <hacklu> in gdbserver/configure.srv, there is a srv_tgtobj="gnu_nat.c ..",
+ if I change the Makefile.in, it doesn't gdb's way.
+ <hacklu> So I can't use the variable srv_tgtobj?
+ <tschwinge> That should be srv_tgtobj="gnu_nat.o [...]"? (Not .c.)
+ <hacklu> I have try this, srv_tgtobj="../gnu_nat.c", then the gnu_nat.o is
+ generate in the parent directory.
+ <hacklu> s/.c/.o
+ <hacklu> (wrong input)
+ <hacklu> For my understand now, I should set the srv_tgtobj="", and then
+ set the gnu_nat.o:../gnu_nat.c in the gdbserver/Makefile.in. right?
+ <tschwinge> Hmm, I thought you'd need both.
+ <tschwinge> Have you tried that?
+ <hacklu> no, haven't yet. I will try soon.
+ <hacklu> I have met an strange thing. I have this in Makefile,
+ i386gnu-nat.o:../i386gnu-nat.c $(CC) -c $(CPPFLAGS) $(INTERNAL_CFLAGS) $<
+ <hacklu> When make, it will complain that: no rules for target
+ i386gnu-nat.c
+ <hacklu> but I also have a line gnu-nat.o:../gnu-nat.c ../gnu-nat.h. this
+ works well.
+ <tschwinge> hacklu: Does it work if you use $(srcdir)/../i386gnu-nat.c
+ instead of ../i386gnu-nat.c?
+ <tschwinge> Or similar.
+ <hacklu> I have try this, i386gnu-nat.c: echo "" ; then it works.
+ <hacklu> (try $(srcdir) ing..)
+ <hacklu> make: *** No rule to make target `.../i386gnu-nat.c', needed by
+ `i386gnu-nat.o'. Stop.
+ <hacklu> seems no use.
+ <hacklu> tschwinge: I have found another thing, if I rename the
+ i386gnu-nat.o to other one, like i386gnu-nat2.o. It works!
+
+
+# IRC, freenode, #hurd, 2013-09-07
+
+ <hacklu> hi, I have found many '^L' in gnu-nat.c, should I fix it or keep
+ origin?
+ <LarstiQ> hacklu: fix in what sense?
+ <hacklu> remove the line contains ^L
+ <LarstiQ> hacklu: see bottom of
+ http://www.gnu.org/prep/standards/standards.html#Formatting
+ <LarstiQ> hacklu: "Please use formfeed characters (control-L) to divide the
+ program into pages at logical places (but not within a function)."
+ <LarstiQ> hacklu: so unless a reason has come up to deviate from the gnu
+ coding standards, those ^L's are there by design
+ <hacklu> LarstiQ: Thank you! I always think that are some format error. I
+ am stupid.
+ <LarstiQ> hacklu: not stupid, you just weren't aware
+ * LarstiQ thought the same when he first encountered them
+
+
+# IRC, freenode, #hurd, 2013-09-09
+
+ <youpi> hacklu_, hacklu__: I don't know what tschwinge thinks, but I guess
+ you should work with upstream on integration of your existing work, this
+ is part of the gsoc goal: submitting one's stuff to projects
+ <tschwinge> youpi: Which is what we're doing (see the patches recently
+ posted). :-)
+ <youpi> ok
+ <hacklu__> youpi: I always doing what you have suggest. :)
+ <hacklu> I have asked in my new mail, I want to ask at here again. Should
+ I change the gdb use lwp filed instead of tid field? There are
+ <hacklu> too many functions use tid. Like
+ <hacklu> named tid in the structure proc also.
+ <hacklu> make_proc(),inf_tid_to_thread(),ptid_build(), and there is a field
+ <hacklu> (sorry for the bad \n )
+ <hacklu> and this is my weekly
+ report. http://hacklu.com/blog/gsoc-weekly-report12-186/
+ <hacklu> And in Pedro Alves's reply, he want me to integration only one
+ back-end for gdb and gdbserver. but the struct target_obs are just
+ decalre different in both of the two. How can I integrate this? or I got
+ the mistaken understanding?
+ <hacklu> tschwinge: ^^
+ <tschwinge> hacklu: I will take this to email, so that Pedro et al. can
+ comment, too.
+ <tschwinge> hacklu: I'm not sure about your struct target_ops question.
+ Can you replay to Pedro's email to ask about this?
+ <hacklu> tschwinge: ok.
+ <tschwinge> hacklu: I have sent an email about the LWP/TID question.
+ <hacklu> tschwinge: Thanks for your email, now I know how to fix the
+ LWP/TID for this moment.
+ <tschwinge> hacklu: Let's hope that Pedro also is fine with this. :-)
+ <hacklu> tschwinge: BTW, I have a question, if we just use a locally
+ auto-generated number to distignuish threads in a process, How can we do
+ that?
+ <hacklu> How can we know which thread throwed the exception?
+ <hacklu> I haven't thought about this before.
+ <tschwinge> hacklu: make_proc sets up a mapping from Mach threads to GDB's
+ TIDs. And then, for example inf_tid_to_thread is used to look that up.
+ <hacklu> tschwinge: oh, yeah. that is.
+
+
+# IRC, freenode, #hurd, 2013-09-16
+
+ <tschwinge> hacklu: Even when waiting for Pedro (and me) to comment, I
+ guess you're not out of work, but can continue in parallel with other
+ things, or improve the patch?
+ <hacklu> tschwinge: honestly to say, these days I am out of work T_T after
+ I have update the patch.
+ <hacklu> I am not sure how to improve the patch beyond your comment in the
+ email. I have just run some testcase and nothing others.
+ <tschwinge> hacklu: I have not yet seen any report on the GDB testsuite
+ results using your gdbserver port (see
+ gdb/testsuite/boards/native-gdbserver.exp). :-D
+ <hacklu> question is, the resule of that testcase is just how many pass how
+ many not pass.
+ <hacklu> and I am not sure whether need to give this information.
+ <tschwinge> Just as a native run of GDB's testsuite, this will create *.sum
+ and *.log files, and these you can diff to those of a native run of GDB's
+ testsuite.
+ <hacklu> https://paste.debian.net/41066/ this is my result
+ === gdb Summary ===
+
+ # of expected passes 15573
+ # of unexpected failures 609
+ # of unexpected successes 1
+ # of expected failures 31
+ # of known failures 57
+ # of unresolved testcases 6
+ # of untested testcases 47
+ # of unsupported tests 189
+ /home/hacklu/code/gdb/gdb/testsuite/../../gdb/gdb version 7.6.50.20130619-cvs -nw -nx -data-directory /home/hacklu/code/gdb/gdb/testsuite/../data-directory
+
+ make[3]: *** [check-single] Error 1
+ make[3]: Leaving directory `/home/hacklu/code/gdb/gdb/testsuite'
+ make[2]: *** [check] Error 2
+ make[2]: Leaving directory `/home/hacklu/code/gdb/gdb'
+ make[1]: *** [check-gdb] Error 2
+ make[1]: Leaving directory `/home/hacklu/code/gdb'
+ make: *** [do-check] Error 2
+ <hacklu> I got a make error so I don't get the *.sum and *.log file.
+ <tschwinge> Well, that should be fixed then?
+ <tschwinge> hacklu: When does university start again for you?
+ <hacklu> My university have start a week ago.
+ <hacklu> but I will fix this,
+ <tschwinge> Oh, OK. So you won't have too much time anymore for GDB/Hurd
+ work?
+ <hacklu> it is my duty to finish my work.
+ <hacklu> time is not the main problem to me, I will shedule it for myself.
+ <tschwinge> hacklu: Thanks! Of course, we'd be very happy if you stay with
+ us, and continue working on this project (or another one)! :-D
+ <hacklu> I also thanks all of you who helped me and mentor me to improve
+ myself.
+ <hacklu> then, what the next I can do is that fix the testcase failed?
+ <tschwinge> hacklu: It's been our pleasure!
+ <tschwinge> hacklu: A comparison of the GDB testsuite results for a native
+ and gdbserver run would be good to get an understanding of the current
+ status.
+ <hacklu> ok, I will give this comparison soon. BTW,should I compare the
+ native gdb result with the one before my patch
+ <tschwinge> You mean compare the native run before and after your patch?
+ Yes, that also wouldn't hurt to do, to show that your patch doesn't
+ introduce any regressions to the native GDB port.
+ <hacklu> ok, beside this I should compare the native gdb with gdbserver ?
+ <tschwinge> Yes.
+ <hacklu> beside this, what I can do more?
+ <tschwinge> No doubt, there will be differences between the native and
+ gdbserver test runs -- the goal is to reduce these. (This will probably
+ translate to: implement more stuff for the Hurd port of gdbserver.)
+ <hacklu> ok, I know it. Start it now
+ <tschwinge> As time permits. :-)
+ <hacklu> It's ok. :)
+
+
+# IRC, freenode, #hurd, 2013-09-23
+
+ <hacklu_> I have to go out in a few miniutes, will be back at 8pm. I am
+ sorry to miss the meeting this week, I will finishi my report soon.
+ <hacklu_> tschwinge, youpi ^^
diff --git a/community/gsoc/2013/nlightnfotis.mdwn b/community/gsoc/2013/nlightnfotis.mdwn
new file mode 100644
index 00000000..a9176f51
--- /dev/null
+++ b/community/gsoc/2013/nlightnfotis.mdwn
@@ -0,0 +1,3037 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+[[!toc]]
+
+
+# IRC, freenode, #hurd, 2013-06-29
+
+ <teythoon> so, how is your golang port going?
+ <nlightnfotis> I just started working on it. I had been reading
+ documentation so far. Maybe over reading as people told me when I asked
+ for their feedback
+ <nlightnfotis> but I will report on what I have done (technically tomorrow,
+ and post it in the mailing list too.
+
+ <nlightnfotis> Hey guys, what could possibly cause the following error
+ message when executing a program in the Hurd? "./dumper: Could not open
+ note: (system server) error with unknown subsystem"
+ <nlightnfotis> My program is one that opens a file and dumps it into stdout
+ <nlightnfotis> pinotree: the code I am using is the one present here
+ http://www.gnu.org/software/hurd/hacking-guide/hhg.html under paragraph
+ 6.1
+ <nlightnfotis> I investigated it a bit but can not find a lead. I seem to
+ have all the rights to open the file that I want to dump to stdout
+ <pinotree> what if you reset errno to 0 just after all the declarations in
+ main, before the instructions?
+ <nlightnfotis> will check this out and get back to you.
+ <pinotree> sure :)
+ <nlightnfotis> pinotree: Now it suggests that it can't get the number of
+ readable files, which the source suggests that is normal behavior.
+ Thanks for your assistance.
+
+
+# IRC, freenode, #hurd, 2013-07-01
+
+ <nlightnfotis> youpi: from my part I can report that I have started working
+ with the code, and doing as Thomas suggested. I was about to write my
+ report yesterday, but I am facing some build errors on the HURD, which I
+ would like to investigate further before I write my report.
+ <nlightnfotis> that's why I decided to write it later in the day.
+ <youpi> I don't think you have to wait
+ <youpi> you can simply write in your report that you are having build
+ errors
+ <nlightnfotis> ok. I will have it written and delivered later in the day.
+ <nlightnfotis> braunr: that's cool. I think my reading has paid for
+ itself. And you may be pleased to know that I have gotten my hands dirty
+ with the code. I was about to write report yesterday, but some build
+ errors with the gcc (that I am investigating atm) are holding me
+ off. Will have that written later in the day.
+ <braunr> don't hesitate to ask help about build errors
+ <braunr> don't wait too much
+ <braunr> you need to progress on what matters, and not be blocked by
+ secondary problems
+ <nlightnfotis> I will see myself asking for help rather sooner than later,
+ but I would like to investigate it myself, and attempt to solve the
+ issues that occur to me before resort to bugging you guys.
+ <braunr> sure
+ <braunr> just not too long
+ <braunr> too long being a day or so
+ <nlightnfotis> these were my build_results on the hurd
+ <nlightnfotis> they were linker errors
+ <nlightnfotis>
+ https://gist.github.com/NlightNFotis/5896188#file-build_results
+ <nlightnfotis> I am trying to build gcc on a linux 32 bit environment. It
+ also has some issues but not linker errors
+ <nlightnfotis> will resolve them to see if the linker errors are
+ reproducible on linux
+ <braunr> oh, lex stuff
+ <braunr> should be easy enough
+
+
+# IRC, freenode, #hurd, 2013-07-05
+
+ <nlightnfotis> I have not made much progress, but I see myself working with
+ it.
+ <nlightnfotis> I have managed to build gcc go on Linux
+ <nlightnfotis> but Hurd seems to have some issues
+ <nlightnfotis> it seems to randomly crash
+ <teythoon> the build process?
+ <nlightnfotis> not quite randomly it seems to be though
+ <nlightnfotis> yeah
+ <nlightnfotis> I have noticed that there is a pattern
+ <nlightnfotis> it does crash after some time
+ <teythoon> ^^
+ <nlightnfotis> but it doesn't crash at specific files
+ <braunr> define crash
+ <nlightnfotis> at some times it may crash during compiling insn-emit.c
+ <braunr> (hello guys)
+ <teythoon> hi braunr :)
+ <nlightnfotis> braunr: hey there! It does seem to keep on compiling this
+ file for a very long time (I have let it do so for 10, 20, 30 minutes)
+ but the result is the same
+ <nlightnfotis> and it does so for different files for different build
+ options
+ <braunr> ok so it doesn't crash
+ <braunr> it just doesn't complete
+ <braunr> is the virtual machine eating 100% cpu during that time ?
+ <nlightnfotis> I can still type at the terminal, but I can't send a term
+ signal
+ <nlightnfotis> I can report that QEMU does hold 100% of one core at that
+ time, (like it keeps processing) but there is no output on the terminal
+ <braunr> ok
+ <nlightnfotis> of course I can type at the terminal
+ <nlightnfotis> but nothing happens
+ <braunr> any idea of the size of the files involved ?
+ <nlightnfotis> I am checking it out right now
+ <nlightnfotis> before this goes any further, let me report on my
+ investigation
+ <braunr> i expect that to be our classic writeback thread storm issue
+ <nlightnfotis> initially, I thought it might be that it run out of memory
+ <nlightnfotis> even though I know that compilation is not memory intensive,
+ rather, cpu intensive
+ <nlightnfotis> anyway I increased the size of ram available to the vm
+ <nlightnfotis> from 1024 mb to 1536
+ <nlightnfotis> that didn't seem to have any effect. The "crash" still
+ happens at the same time, at the same files
+ <braunr> use freeze
+ <braunr> not crash
+ <braunr> crash is very misleading here
+ <nlightnfotis> freeze it is then.
+ <nlightnfotis> anyway
+ <nlightnfotis> then it striked me that it might be that the hard disk size
+ (3gb) might be too small (considering the gcc git repo is 1gb+)
+ <nlightnfotis> so I resized the qemu image to 8gb of hdd size
+ <nlightnfotis> the new size is acknowledged by the vm
+ <pinotree> for gcc in debug mode? might still not be enough
+ <nlightnfotis> but still it has no effect - it seems to follow its freezing
+ patterns
+ <pinotree> giving your work, i'd have not less than 15-20
+ <braunr> i'd use 32
+ <pinotree> *given
+ <braunr> but that's because i like power of twos
+ <nlightnfotis> pinotree: thanks for the advice. Right now I was gonna
+ increase the swap size
+ <nlightnfotis> according to vmstat in the hurd
+ <nlightnfotis> swap size is 173 mb
+ <nlightnfotis> don't know if it does have an impact
+ <braunr> it may but before rushing
+ <braunr> if you need swap, you're doomed anyway
+ <braunr> consider swap highly unreliable on the hurd
+ <braunr> please show the output of df -h on the file system you're using to
+ build
+ <braunr> ideally, i'd recommend using separate / and /home file systems
+ <braunr> it really improves reliability
+ <nlightnfotis> I don't think it swaps to be honest; however that's
+ something that my mentor thomas had suggested (increasing swap size) so I
+ am gonna try it at some time.
+ <pinotree> or have a separate file system in a subdi and work on it
+ <braunr> yes, /home or whatever suits you
+ <braunr> just not /
+ <nlightnfotis> braunr: pinotree: thanks both for your advice. Will do now,
+ and report on the results.
+ <braunr> that's not all
+ <braunr> 11:17 < braunr> please show the output of df -h on the file system
+ you're using to build
+ <nlightnfotis> braunr: I am on it. Oh and btw, everytime I am forced to
+ close the vm (due to the freezes) when I restart it ext2 reports that the
+ file system was not cleanly unmounted and does some repair to some
+ files. I am trying to find an explanation for that, but I can think of
+ many things
+ <braunr> well obviously
+ <pinotree> ext2 has no journaling
+ <braunr> the file system was not cleanly unmounted since you restarted it
+ with a cold reset
+ <nlightnfotis> braunr: df -h comes out with this: "df: cannot read table of
+ mounted file systems"
+ <pinotree> also, even if you manage to always shut down correctly, when
+ fsck runs because of the maximum mount count it'd find errors anyway (so
+ we have some bug)
+ <braunr> nlightnfotis: df -h /path/to/build/dir
+ <braunr> pinotree: not really bugs but it could be cleaned up
+ <nlightnfotis> filesystem: - Size 2.8G Used 2.8G Avail 0 Use% 100% Mounted
+ on /
+ <nlightnfotis> wow
+ <braunr> nlightnfotis: see
+ <nlightnfotis> that seems to explain many things
+ <teythoon> ^^
+ <nlightnfotis> thanks for that braunr!
+ <braunr> you resized the disk, but not the partition and the file system
+ <pinotree> braunr: well, if something in ext2 (or its libs) leaves issues
+ in the fs, i'd call that a bug :>
+ <nlightnfotis> yeah, that was utterly stupid of me
+ <braunr> pinotree: they're not issues
+ <braunr> nlightnfotis: be careful, mach needs a reboot every time you
+ change a partition table
+ <teythoon> nlightnfotis: important thing is that you found the issue :)
+ <braunr> then only, you can use resize2fs
+ <teythoon> braunr: weird, I thought mach nowadays can reload the partition
+ tables?
+ <teythoon> braunr: doesn't d-i need that?
+ <braunr> maybe a recent change i forgot
+ <braunr> or maybe fdisk still reports the error although it's fine
+ <braunr> in doubt, rebooting is still safe :p
+ <teythoon> or maybe youpi hacked it into d-is gnumach
+ <braunr> i doubt it would be there for the installer only :)
+ <braunr> if it's there, it's there
+ <braunr> i just don't know it
+ <nlightnfotis> braunr: teythoon: and everyone else that helped me. Thanks
+ you all guys. This was something that was driving me crazy. Will do all
+ that you suggested and report back on my status
+
+
+# IRC, freenode, #hurd, 2013-07-08
+
+ <nlightnfotis> tschwinge, I have managed to overcome most of the obstacles
+ I had initially faced with my project
+ <nlightnfotis> but I still had some build errors, that's why I have not
+ reported yet. Wanna try to see if I can resolve them today, and write my
+ report in the afternoon.
+ <tschwinge> nlightnfotis: So, from a quick look into the IRC backlog, it
+ was a "simple" out of disk space problem? %-) That happens.
+ <tschwinge> nlightnfotis: And yes, GCC needs a lot of disk space.
+ <tschwinge> nlightnfotis: What kind of build errors are you seeing now?
+ <nlightnfotis> tschwinge, yeah I felt stupid at the time, but it didn't
+ actually strike me that the file system didn't see the extra space. Also
+ it took me some time to figure out that in order to mount the new
+ partition, I only had to edit /etc/fstab
+ <nlightnfotis> always tried to mount it with the ext2 translator
+ <nlightnfotis> and the translator kept dying
+ <nlightnfotis> but it's all figured out now
+ <nlightnfotis> the latest build errors I am seeing are these
+ <teythoon> nlightnfotis: o_O you used fstab and it worked?
+ <nlightnfotis> yeah
+ <teythoon> nlightnfotis: that's unexpected from my perspective...
+ <nlightnfotis> I only had to add the new partition into fstab
+ <nlightnfotis> teythoon: I can pastebin my fstab if you wanna take a look
+ at it
+ <nlightnfotis> tschwinge: these were my latest build errors
+ https://www.dropbox.com/s/b0pssdnfa22ajbp/build_results
+ <teythoon> nlightnfotis: I'm pretty sure that mount -a isn't done on hurd
+ w/o pinos runsystem.sysv
+ <teythoon> weird
+ <nlightnfotis> tschwinge: I have also tried to build gcc with "make -w"
+ which from what I know supresses the errors that stopped compilation
+ <nlightnfotis> but the weird thing is that gcc nearly took forever to build
+ <teythoon> nlightnfotis: could you do a showtrans /your/mountpoint?
+ <nlightnfotis> teythoon: /hurd/ext2fs /dev/hd0s3
+ <teythoon> nlightnfotis: ok, so you've set a passive translator and an
+ active is started on demand
+ <nlightnfotis> it must be a passive translator
+ <teythoon> nlightnfotis: this is the hurd way of doing things, fstab is
+ unrelated
+ <nlightnfotis> it seems to persist during reboots
+ <teythoon> yes, exactly
+ <nlightnfotis> teythoon: my fstab if you wanna take a look
+ http://pastebin.com/ef94JPhG
+ <nlightnfotis> after I added /dev/hd0s3 to fstab along with its mountpoint,
+ and restarting the hurd, only then I did manage to use that partition
+ <nlightnfotis> before doing so I tried pretty much anything involving
+ mounting the partition and setting the ext2fs translator for it, but it
+ kept dying
+ <nlightnfotis> of course it was a ext2 filesystem
+ <youpi> err, perhaps adding to fstab simply triggered an fsck at reboot?
+ <teythoon> nlightnfotis: might have been that you needed to reboot mach so
+ that it picks up the new partition table
+ <teythoon> youpi: I thought this was fixed, the partition reloading I mean?
+ <youpi> that is needed, yes
+ <youpi> let me check
+ <nlightnfotis> youpi: it could be, though, to be honest, my hurd system
+ does an fsck all the time at boot
+ <teythoon> how do you manage to do that w/o rebooting for d-i?
+ <youpi> (I don't remember whether device busy is detected)
+ <youpi> teythoon: by making all translators go away, iirc
+ <teythoon> nlightnfotis: btw, you have ~/gcc_new as mountpoint in your
+ fstab, pretty sure that this cannot work, the path has to be absolute and
+ no ~ expansion is done
+ <nlightnfotis> tbh it does work, and it's weird
+ <teythoon> nlightnfotis: it works b/c of the passive translator you set,
+ not b/c of the fstab entry
+ <nlightnfotis> teythoon: should I change it?
+ <teythoon> probably, yes
+ <tschwinge> Well, that is probably not used anywhere.
+ <teythoon> tschwinge: not yet but soon ;)
+ <tschwinge> Isn't /etc/fstab only consulted for fsck.
+ <youpi> atm yes
+ <tschwinge> Anyway, it is definitely a very good idea to have a partition
+ separate from the rootfs for doing actual work.
+ <tschwinge> I think I described that in one of the first GSoC coodridation
+ emails. In the long one.
+ <nlightnfotis> teythoon: Oh it struck me now! Is it because tilde expansion
+ is only happening in bash, but /etc/fstab is read before bash is
+ initialized?
+ <tschwinge> nlightnfotis: Instead of fumbling around with partitioning of
+ disk images, it may be easier in your KVM/QEMU setup to simply add a new
+ disk using -hdb [file] (or similar).
+ <tschwinge> nlightnfotis: Basically, yes.
+ <youpi> nlightnfotis: fstab is not related with bash in any way
+ <nlightnfotis> anyway, it shouldn't matter now, it seems to be working, and
+ I wouldn't like fiddling around with it and messing it up now. I will
+ continue with resolving the gcc issues.
+ <tschwinge> But /etc/fstab has its very own "language" (layout), so tilde
+ expansion will never be done there.
+ <tschwinge> nlightnfotis: df -h ~/gcc_new/
+ <nlightnfotis> tschwinge: size 24G Used: 4.2G Avail 18G
+ <tschwinge> OK, that's fine.
+ <tschwinge> As you can see on
+ <http://darnassus.sceen.net/~hurd-web/open_issues/gcc/#index4h1>, GCC
+ will easily need some GiB.
+ <nlightnfotis> tschwinge: I have some questions about GCC: out of curiosity
+ how much time does it take to compile it on your machine? Because
+ yesterday I tried a -w (suppress warnings) build and it seemed to take
+ forever
+ <nlightnfotis> mind you the vm has 1536 ram available (I have read
+ somewhere that it can utilise such an amount) and the vm is KVM enabled
+ <youpi> without disabling g++, it can easily take hours
+ <tschwinge> nlightnfotis: The build error is unexpected, because I had
+ addressed that issue in a recent patch. :-)
+ <tschwinge> nlightnfotis: This is wrong: »checking whether setcontext
+ clobbers TLS variables... [...] yes«. Please check your sources, that
+ they correspond to the current version of the upstream
+ tschwinge/t/hurd/go branch.
+ <tschwinge> nlightnfotis: Quoting from that wiki page: »This takes up
+ around 3.5 GiB, and needs roughly 3.5 h on kepler.SCHWINGE and 15 h on
+ coulomb.SCHWINGE.« The latter is my Hurd machine.
+ <tschwinge> That's however with Java and Ada enabled, and a full
+ three-stages bootstrap.
+ <youpi> ah, right, there's java & ada too
+ <nlightnfotis> tschwinge: git branch (in the repo): master,
+ *tschwinge/t/hurd/go
+ <youpi> in debian they are built separately
+ <tschwinge> What I asked you to do is configure »--disable-bootstrap
+ --enable-languages=go«.
+ <tschwinge> So that should be a lot quicker.
+ <nlightnfotis> tschwinge: oh yes, everytime I have tried to compile gcc I
+ have done with these configurations
+ <tschwinge> But still a few hours perhaps.
+ <nlightnfotis> that's what I did yesterday too.
+ <tschwinge> OK, good. :-)
+ <tschwinge> A bootstrap build is a good way to check the just-built GCC for
+ sanity, but we expect that it is fine, as we concentrate on the GCC Go
+ port.
+ <nlightnfotis> the only "extra" configuration yesterday was my "-w" flag to
+ make, because those errors were actually triggered by -Werror
+ <tschwinge> Let me read up what make -w does. ;-)
+ <nlightnfotis> ah, yes, d/w I have read and understood what the bootstrap
+ build is. Seems like we don't need it atm
+ <nlightnfotis> afaik it suppresses all warnings
+ <pinotree> youpi: gcj no more
+ <nlightnfotis> the way gcc builds, it does convert (some) warnings to
+ errors
+ <tschwinge> Hmm. -w, --print-directory Print a message containing the
+ working directory before and after other processing.
+ <pinotree> youpi: doko folded gcj and gdc into gcc-4.8 to "workaround"
+ Built-Using
+ <tschwinge> nlightnfotis: Ah, that'S configure --enable-werror or something
+ like that.
+ <youpi> pinotree: right
+ <nlightnfotis> yep, and -w suppresses it
+ <nlightnfotis> (from what I have understood)
+ <tschwinge> nlightnfotis: Are you thinking about make -k?
+ <tschwinge> Yeah, I guess.
+ <nlightnfotis> let me see what -k does
+ <pinotree> youpi: (just to make builds even more lightweight, eh</irony>)
+ <nlightnfotis> yeah, -k should do too, I shall try it
+ <tschwinge> But: if gcc -Werror fails, even with make -k, the build will
+ not be able to come to a successful end, because that one complation
+ artefact that failed will be missing.
+ <nlightnfotis> so I shall try again with -w (supressed warnings)
+ <tschwinge> Configureing with --disable-werror (or similar) will "help" if
+ -Werror is the default, and the build fails due to that.
+ <nlightnfotis> from what I have understood these "errors" are not something
+ critical: it's only that function prototypes for these functions are
+ missing
+ <nlightnfotis> I have seen the code there, and even "default" gcc generated
+ prototypes (from the first usage of the function) should do, so I can't
+ understand why it might be a serious problem if I tell gcc to skip that
+ point
+ <tschwinge> nlightnfotis: Ah, now I see. You don't mean make -w, but
+ rather gcc -w: »-w Inhibit all warning messages.«
+ <tschwinge> But really, there shouldn't be such warnings/errors that make
+ the build fail.
+ <nlightnfotis> yeah
+ <tschwinge> nlightnfotis: In your GCC sources directory, what does this
+ tell: git rev-parse HEAD
+ <tschwinge> And, is the checkout clean: git status
+ <tschwinge> The latter will take some time.
+ <nlightnfotis> git status takes an awful amount of time
+ <nlightnfotis> last I checked
+ <nlightnfotis> but git rev-parse HEAD
+ <nlightnfotis> produces this result:
+ <nlightnfotis> 91840dfb3942a8d241cc4f0e573e5a9956011532
+ <tschwinge> OK, that's correct. So probably some of the checked out files
+ are not in a pristine state?
+ <nlightnfotis> I shall run a git clean and see. If that doesn't work too,
+ maybe I shall reclone the repository?
+ <nlightnfotis> there's nothing foreign to the repo that I have added, only
+ lib gmp, lib mpc and lib mpfr (and they are in their own folders inside
+ my gcc working directory)
+ <tschwinge> nlightnfotis: You shouldn't need to do the latter if you
+ instead run: apt-get build-dep gcc-4.8
+ <nlightnfotis> I remember having done that inside the Hurd, but it always
+ resulted in an error from what I can recall
+ <nlightnfotis> let me check this out
+ <nlightnfotis> yes
+ <tschwinge> nlightnfotis: Whenever you use Git on Hurd, pass the --quiet
+ flag, to avoid the rare but possible corruption issue described on
+ <http://darnassus.sceen.net/~hurd-web/open_issues/git_duplicated_content/>
+ and <http://darnassus.sceen.net/~hurd-web/open_issues/git-core-2/>.
+ <nlightnfotis> tschwinge: Forgive me for that. I will set up an alias
+ immediately.
+ <tschwinge> nlightnfotis: I don't know if an alias is possible, because --
+ I think -- you'll need to do things like: git fetch --quiet
+ <tschwinge> So pass --quiet to subcommands.
+ <nlightnfotis> oh. ok.
+ <tschwinge> nlightnfotis: What you can also do, is shut down your Hurd VM,
+ and mount the disk image on GNU/Linux (mount with offset to get the right
+ partition), and then run a diff -ru against a Git clone done on
+ GNU/Linux, and see whether there are any unexpected differences outside
+ of the .git/ directory.
+ <nlightnfotis> sounds like a plan. I will check this out today then :)
+ <nlightnfotis> tschwinge: if all else fails, then recloning the repo with
+ --quiet passed should work, right?
+ <tschwinge> Yes, that's probably the most straight-forward check to do.
+ <tschwinge> Heh, yes to both these questions. :-)
+ <tschwinge> nlightnfotis: Oh, you don't even have to re-clone, but rather
+ re-check-out the branch.
+ <nlightnfotis> I was thinking of recloning just to bring the whole
+ repository to a pristine state
+ <tschwinge> So something like (inside the source directory): rm -rf ./*
+ (remove any files, but leave .* in place, in particular the .git/
+ directory), followd by git checkout -f HEAD --quiet
+ <tschwinge> nlightnfotis: But before doing that, please do the diff first,
+ so that we know (hopefully) where the erroneous build results were coming
+ from.
+ <nlightnfotis> considering the Copyright assignment files, I have sent them
+ from day 1 (that is the 20th of June). I have not heard anything about
+ those documents to date (sadly)
+ <nlightnfotis> what's worst is that although I have a reference number to
+ track those documents, their (greek postal office) tracking service sucks
+ so badly, that one day it's offline, the next it suggests it can't find
+ the object in their database, the next it says it is still in the local
+ post office
+ <nlightnfotis> let me check it out now
+ <nlightnfotis> still nothing from their online service
+ <nlightnfotis> let me call them
+ <nlightnfotis> tschwinge: I called the post office regarding the copyright
+ papers. They told me that the same day (the 20th of June) it left from
+ Herakleion, Crete to Athens and the same day it must have left the
+ country heading towards the US. They also told me it takes about 1 week
+ for it to arrive.
+ <tschwinge> nlightnfotis: OK, so probably waiting at the FSF office to be
+ processed. Let's allow for some more time. After all, this is not
+ critical for your progress.
+
+
+# IRC, freenode, #hurd, 2013-07-10
+
+ <nlightnfotis> tschwinge: I have run the diff of the GCC repo on the Hurd
+ against the one on my host linux os, and there was nothing relevant to
+ fixcontext and initcontext that are the ones that fail the
+ compilation. In any case I did recheck out the branch, and I have
+ attempted a build with it. It fails at the same point. Now I am
+ attempting a build with the -w (inhibit warnings) flag enabled
+ <tschwinge> nlightnfotis: Have there been any differences in the diff?
+ There should be none at all.
+ <nlightnfotis> tschwinge: there were some small changes due to the repo's
+ being checked out at different times. It was a large diff however. I
+ inspected it and didn't find anythign that was of much use. Here it is in
+ case you might want to see it:
+ https://www.dropbox.com/s/ilgc3skmhst7lpv/diffs_in_git.txt
+ <tschwinge> nlightnfotis: Well, the idea of this exercise precisely was to
+ use the same Git revisions on both sides of the diff -- to show that
+ there are no spurious differences -- which can't be shown from your
+ 124486 lines diff. (Even though indeed there is no difference in
+ libgo/configure that would explain the mis-match, but who knows what else
+ might be relevant for that.
+ <tschwinge> Would you please repeat that?
+ <nlightnfotis> tschwinge: I will do so. It was wrong from me to not diff
+ against the same revisions, but going through the diff results grepping
+ for the problematic code didn't yield any results, so I thought that
+ might not be the issue.
+ <nlightnfotis> I will perform the diff again tomorrow morning and report on
+ the results.
+ <tschwinge> nlightnfotis: Anyway, if you checked out again, the latest
+ revision, and it still fails in exactly the same way, there is something
+ wrong.
+ <tschwinge> nlightnfotis: And -w won't help, as there is a hard error
+ involved.
+ <tschwinge> nlightnfotis: Are yous till working on GSoC things today?
+ <nlightnfotis> tschwinge: yeah I am here. I decided to do the diff today
+ instead of tomorrow.
+ <nlightnfotis> It finished now btw
+ <nlightnfotis> let me tell you
+ <nlightnfotis> ah and this time, the gits were checked out at the same time
+ <nlightnfotis> from the same source
+ <nlightnfotis> and are at the same branch
+ <tschwinge> nlightnfotis: Coulod you upload the
+ gccbuild/i686-unknown-gnu0.3/libgo/config.log of the build that failed?
+ <nlightnfotis> tschwinge: sure. give me a minute
+ <nlightnfotis> tschwinge: there is something strange going on. The two
+ repos are at the exact same state (or at least should be, and the logs
+ indicate them to be) but still the diff output is 4.4 mb
+ <nlightnfotis> but no presence of initcontext of fixcontext
+ <nlightnfotis> tschwinge: the config.log file -->
+ http://pastebin.com/bSCW1JfF
+ <nlightnfotis> wow! I can see several errors in the config.log file
+ <nlightnfotis> but I am not so sure about their fatality. Config returns 0
+ at the end of the log
+ <tschwinge> nlightnfotis: As the configure scripts probe for all kings of
+ features on all kings of strange systems, it's to be expected that some
+ of these fail on GNU/Hurd.
+ <tschwinge> What is not expected, however, is:
+ <tschwinge> configure:15046: checking whether setcontext clobbers TLS
+ variables
+ <tschwinge> [...]
+ <tschwinge> configure:15172: ./conftest
+ <tschwinge> /root/gcc_new/gcc/libgo/configure: line 1740: 1015 Aborted
+ ./conftest$ac_exeext
+ <tschwinge> Hmm. apt-cache policy libc0.3
+ <tschwinge> nlightnfotis: ^
+ <nlightnfotis> tschwinge: Installed 2.13-39+hurd.3
+ <nlightnfotis> Candidate: 2.1-6
+ <nlightnfotis> *2.17
+ <tschwinge> Bummer.
+ <tschwinge> nlightnfotis: As indicated in
+ <http://news.gmane.org/find-root.php?message_id=%3C87li6cvjnl.fsf%40kepler.schwinge.homeip.net%3E>
+ and thereabouts, you need 2.17-3+hurd.4 or later...
+ <tschwinge> Well.
+ <tschwinge> At least that now explains what is going on.
+ <nlightnfotis> tschwinge: i see. I am in the process of updating my hurd
+ vm. I saw that libc has also been updated to 2.17
+ <nlightnfotis> I will confirm when updating is done
+ <tschwinge> nlightnfotis: Anyway, is the diff between the two repositories
+ empty now or are there still differences?
+ <nlightnfotis> there are differences
+ <nlightnfotis> and they were checked out at the same time
+ <nlightnfotis> from the same source
+ <nlightnfotis> (the official git mirror)
+ <nlightnfotis> and they are both at the same branch
+ <nlightnfotis> and still diff output is 4.4 MB
+ <nlightnfotis> but quick grepping into it and there is not mention of
+ initcontext or fixcontext
+ <tschwinge> That's... unexpected.
+ <nlightnfotis> may be a mistake I am making
+ <nlightnfotis> but considering that diff run for some time before
+ completing
+ <tschwinge> In both Git repositories, »git rev-parse HEAD« shows the same
+ thing?
+ <tschwinge> Could you please upload the diff again?
+ <nlightnfotis> tschwinge: confirmed. libc is now version 2.17-1
+ <nlightnfotis> tschwinge: http://pastebin.com/bSCW1JfF
+ <nlightnfotis> for the rev-parse give me a second
+ <tschwinge> nlightnfotis: Where is libc0.3 2.17-1 coming from? You need
+ 2.17-3+hurd.4 or later.
+ <nlightnfotis> it is 2.17-7+hurd.1
+ <tschwinge> OK, good.
+ <tschwinge> The URL you just have is the config.log file, not the diff.
+ <tschwinge> s%have%gave
+ <nlightnfotis> oh my mistake
+ <nlightnfotis> wait a minute
+ <nlightnfotis> the two repos have different output to rev-parse
+ <tschwinge> Phew.
+ <tschwinge> That explains.
+ <tschwinge> So the Git branches are at different revisions.
+ <nlightnfotis> that confused me... when I run git pull -a the branches that
+ were changed were all updated to the same revision
+ <nlightnfotis> unless... there were some automatic merges in the *host* GCC
+ repo required during some pulls
+ <nlightnfotis> but that was some time ago
+ <nlightnfotis> would it have messed my local history that much?
+ <nlightnfotis> that's the only thing that may be different between the two
+ repos
+ <nlightnfotis> they checkout from the same source
+ <tschwinge> nlightnfotis: At which revisions are the two
+ repositories/branches?
+ <tschwinge> I have never used »put pull -a«. What does that do?
+ <nlightnfotis> tschwinge: from what I know it does an automatic git fetch
+ followed by git merge. The -a flag must signal to pull all branches (I
+ think it's possible to pull only one branch)
+ <tschwinge> That's the --all option. -a is something different (that I
+ don't understand off-hand).
+ <tschwinge> Well, --all means to pull all remotes.
+ <tschwinge> But you just want the GCC upstream, I guess.
+ <tschwinge> I always use git fetch and git merge manually.
+ <nlightnfotis> oh my god! You are write. -a is equivallent to --append
+ <nlightnfotis>
+ https://www.kernel.org/pub/software/scm/git/docs/git-pull.html
+ <nlightnfotis> git pull must be safe though
+ <nlightnfotis>
+ http://stackoverflow.com/questions/292357/whats-the-difference-between-git-pull-and-git-fetch
+ <nlightnfotis> without the -a
+ <nlightnfotis> *right
+ <nlightnfotis> why did I even write "right" as "write" above I don't
+ even...
+ <nlightnfotis> what did I write in the sentence above
+ <nlightnfotis> oh my god...
+ <nlightnfotis> tschwinge: they are indeed on different revisions: The host
+ repo's last commit was made by me apparently, to merge master into
+ tschwinge/t/hurd/go, whereas the last commit of the Hurd repo was by you
+ and it reverted commit 2eb51ea
+ <nlightnfotis> and that should also explain the large diff file
+ <nlightnfotis> with master merged into the tschwinge/t/hurd/go branch
+ <nlightnfotis> I will purge the debian repo and redownload it
+ <nlightnfotis> *reclone it
+ <nlightnfotis> that should bring it to a safe state I suppose.
+
+
+# IRC, freenode, #hurd, 2013-07-11
+
+ <teythoon> nlightnfotis: how's your build going?
+ <nlightnfotis> I tried one earlier and it seemed to build without any
+ issues, something that was...strange. I am repeating the build now, but I
+ am saving the compilation output this time to study it.
+ <teythoon> it was strange that the build succeeded? that sounds sad :/
+ <nlightnfotis> teythoon: considering that 3 weeks now I failed to build it
+ without errors, it sure seems weird that it builds without errors now :)
+ <braunr> what did you change ?
+ <nlightnfotis> braunr: not many things apparently. To be honest the change
+ that seemed to do the trick was (under thomas' guidance) update of libc
+ from 2.13 to 2.17
+ <braunr> well that can explain
+ <nlightnfotis> tschwinge: Big update! GCC-go not compiles without errors
+ under the Hurd. I have done 2 compilations so far, none of which had
+ issues. Time needed for full build (without bootstrap) is 45 minutes +- 1
+ minute. I also run the test suite, and I can confirm your results
+ <pinotree> s/not/now/, perhaps?
+ <nlightnfotis> pinotree yeah. I don't know how it came up with not there. I
+ meant now
+ <nlightnfotis> tschwinge: link for the go.sum is here -->
+ https://www.dropbox.com/s/7qze9znhv96t1wj/go.sum
+
+
+# IRC, freenode, #hurd, 2013-07-12
+
+ <tschwinge> nlightnfotis: Great! So you finally reproduced my results.
+ :-)
+ <nlightnfotis> tschwinge: Yep! I am now building a blog, so that I can move
+ my reports there, so that they are more detailed, to allow for greater
+ transparency of my actions
+ <tschwinge> nlightnfotis: Did you recently (in email, I think?) indicate
+ that there is another Go testsuite, for libgo?
+ <tschwinge> nlightnfotis: As you prefer.
+ <nlightnfotis> tschwinge: there seemed to be one, at least in linux. I
+ think I saw one in the Hurd too.
+ <tschwinge> Oh indeed there is a libgo testsuite, too.
+ <nlightnfotis> as a matter of fact, make check-go
+ <nlightnfotis> did check for the lib
+ <nlightnfotis> but lib was failing
+ <nlightnfotis> yeah
+ <tschwinge> So please have a look at that testsuite's results, too, and
+ compare to the GNU/Linux ones.
+ <nlightnfotis> sure. I can do that now.
+ <tschwinge> And for the go.sum you posted, please have a look at the tests
+ that do not pass (»grep -v ^PASS: < go.sum«), assuming they do pass on
+ GNU/Linux.
+ <tschwinge> I suggest you add a list of the differences between GNU/Linux
+ and GNU/Hurd testresults to the wiki page,
+ <http://darnassus.sceen.net/~hurd-web/open_issues/gccgo/>, at the end of
+ the Part I section.
+ <nlightnfotis> I'm on it.
+ <tschwinge> For now, please ignore any failing tests that have »select« in
+ their name -- that is, do file them, but do not spend a lot of time
+ figuring out what might be wrong there.
+ <tschwinge> The Hurd's select implementation is a bit of a beast, and I
+ don't want you -- at this time -- spend a lot of time on that. We
+ already know there are some deficiencies, so we should postpone that to
+ later.
+ <nlightnfotis> tschwinge: noted.
+ <tschwinge> So what I would like at the moment, is a list of the testresult
+ differences to GNU/Linux, then from the go.log file any useful
+ information about the failing test (which perhaps already explains)
+ what's going wrong, and then a analysis of the failure.
+ <tschwinge> nlightnfotis: I assume you must be really happy that you
+ finally got it build fine, and reproduced my results. :-)
+ <nlightnfotis> tschwinge: yeah! I can not hide from you the fact that
+ failing all those builds made me really nervous about me missing my
+ schedule. Having finally built that and revisiting my application I can
+ see I am on schedule, but I have to intensify my work to compensate for
+ any potential unforeseen obstacles
+ <nlightnfotis> , in the futute
+ <nlightnfotis> *future
+
+
+# IRC, freenode, #hurd, 2013-07-15
+
+ <youpi> nlightnfotis: btw, do you have a weekly progress report?
+ <nlightnfotis> youpi: not yet. Will write it shortly and post it here. I
+ made a new blog to keep track of my progress.
+ <nlightnfotis> Will report much more frequently now via my blog
+ <youpi> did you add your blog url to the hurd iwki?
+ <nlightnfotis> currently I am running gcc tests on both gcc go and libgo to
+ see what the differences are with Linux
+ <nlightnfotis> I believe I have done so, let me see
+ <nlightnfotis> youpi: gccgo passes most of its tests (it fails a small
+ number, and I am looking into those tests) but libgo fails 130/131 tests
+ (on the Hurd that is)
+ <youpi> ok
+
+ <nlightnfotis> guys I wrote my report. This time I made it available on my
+ personal blog. You can find it here:
+ www.fotiskoutoulakis.com/blog/2013/07/15/gsoc-week-4-report/ As always,
+ open to (and encouraging) criticism, suggestions, anything that might
+ help me.
+ <nlightnfotis> I also have to mention that now that my personal website is
+ online, I will report much more frequently, to the scale of reporting day
+ by day, or every 2-3 days.
+ <youpi> nlightnfotis: without spending time on select, it'd be good to have
+ an idea of what is going wrong
+ <braunr> eh, go having trouble with select
+ <youpi> select is a beast, but we do have fixed things lately and we don't
+ currently know any issue still pending
+ <nlightnfotis> youpi: are you suggesting to not skip the select tests too?
+ <braunr> select is kind of critical ..
+ <braunr> as youpi said, if you can determine what's wrong, at the interface
+ level (not the implementation), it would be a good thing to do
+ <youpi> so we know what's wrong
+ <youpi> we're not asking to fix it, though
+ <nlightnfotis> braunr: youpi: noted. Thanks for the feedback. Is there
+ something else you might want me to improve? Something with the report
+ itself? Something you were expecting to see but I failed to provide?
+ <braunr> no it's ok
+ <braunr> it's short, readable, and readily answers the questions i might
+ have had so it's good
+ <braunr> as you say, now you have to work on the core of your task :)
+ <youpi> note: the "select" word in the testsuite is not strictly bound to
+ the C "select"
+ <youpi> so it is probably really worth digging a bit at least on the go
+ side
+ <braunr> but it's really worth doing in the end, as it will probably reveal
+ some nasty bugs on the way
+ <nlightnfotis> I appreciate your input. I will start working on it asap
+ (today) and will report on Wednesday perhaps (or Thursday at worst).
+
+
+# IRC, freenode, #hurd, 2013-07-18
+
+ <nlightnfotis> braunr: I found out what was causing the fails in the tests
+ <nlightnfotis> in both libgo and gccgo
+ <nlightnfotis> it's a assertion: mach_port_t ktid = __mach_thread_self ();
+ int ok = thread->kernel_thread == ktid; __mach_port_deallocate
+ ((__mach_task_self_ + 0), ktid); ok; })
+ <braunr> is all that the assertion ?
+ <nlightnfotis> yes
+ <braunr> please paste the code somewhere
+ <braunr> or is it in libpthread ?
+ <nlightnfotis> http://pastebin.com/G2w9d474
+ nonblock.x: ./pthread/pt-create.c:167: __pthread_create_internal: Assertion `({ mach_port_t ktid = __mach_thread_self (); int ok = thread->kernel_thread == ktid; __mach_port_deallocate ((__mach_task_self_ + 0), ktid); ok; })' failed.
+ 9 FAIL: go.test/test/chan/nonblock.go execution, -O2 -g
+ <braunr> yes
+ <braunr> that's related to my current work on thread destruction
+
+[[open_issues/libpthread/t/fix_have_kernel_resources]].
+
+ <braunr> thread resources recycling is buggy
+ <braunr> i suggest you make your own thread pool if you can
+ <nlightnfotis> I will look into it further and let you know. Thanks for
+ that.
+
+
+# IRC, freenode, #hurd, 2013-07-22
+
+ <nlightnfotis> tschwinge, I have found what is failing both libgo and gccgo
+ tests, but for the life of me, I can not really find the offending code
+ on any repository.
+ <nlightnfotis> not even the eglibc-source debian package. it's driving me
+ insane.
+ <tschwinge> nlightnfotis: If this is driving you insane, we should quickly
+ have a look at that!
+ <nlightnfotis> thanks tschwinge: I have found that the offending code is an
+ assertion: { mach_port_t ktid = __mach_thread_self (); int ok =
+ thread->kernel_th read == ktid; __mach_port_deallocate ((__mach_task_s
+ elf_ + 0), ktid); ok; } on a file called pt-create.c under the
+ libpthread on line 167
+ <nlightnfotis> but for the life of me, I can not find that piece of code
+ anywhere. And when I mean anywhere, I mean anywhere. I have looked for it
+ on all of the branches of glibc, libpthread and the source code of
+ eglibc.
+ <nlightnfotis> that's why if you don't mind I would like to write my report
+ in a day or two, when (hopefully) I will have more progress to report on.
+ <youpi> nlightnfotis: isn't that libpthread/sysdeps/mach/pt-thread-start.c
+ ?
+ <youpi> or rather, ./sysdeps/mach/hurd/pt-sysdep.h
+ <nlightnfotis> youpi: let me check this out. If that's it I'm gonna cry.
+ <youpi> which unfortunately is inlined in a lot of places
+ <youpi> nlightnfotis: does the assertion not tell you the file & line?
+ <nlightnfotis> youpi: holy smokes! That's the code I was looking for! Oh
+ boy. Yeah the logs do tell me, but it was very misleading. So misleading,
+ taht I was actually looking at the wrong place. All logs suggest that
+ this piece of code is at libpthread/pthread/pt-create.c in line 167
+ <youpi> what is that line in your tree?
+ <youpi> a call to _pthread_self(), isn't it?
+ <youpi> then it's not actually misleading, this is indeed where the
+ pt-sysdep.h definition gets inlined
+ <nlightnfotis> it seems so, yeah. it's err = __pthread_sigstate
+ (_pthread_self (), 0, 0, &sigset, 0);
+ <youpi> nlightnfotis: and what is the backtrace?
+ <nlightnfotis> youpi: _pthread_create_internal: Assertion failed.
+ <nlightnfotis> The assertion is the one above
+ <youpi> nlightnfotis: sure, but what is the backtrace?
+ <nlightnfotis> I don't have the full backtrace. These are the logs from the
+ compiler. All I can get is: reports like this: nonblock.x:
+ ./pthread/pt-create.c:167: __pthread_create_internal: Assertion `({
+ mach_port_t ktid = __mach_thread_self (); int ok = thread->kernel_thread
+ == ktid; __mach_port_deallocate ((__mach_task_self_ + 0), ktid);
+ ok; })' failed.
+ <youpi> nlightnfotis: you should probably have a look at running the tests
+ by hand
+ <youpi> so you can run them in a debugger, and get backtraces etc.
+ <braunr> nlightnfotis: did i answer that ?
+ <nlightnfotis> braunr: which one?
+ <braunr> the problems you're seeing are the pthread resources leaks i've
+ been trying to fix lately
+ <braunr> they're not only leaks
+ <braunr> creation and destruction are buggy
+ <nlightnfotis> I have read so in
+ http://www.gnu.org/software/hurd/libpthread.html. I believe it's under
+ Thread's Death right?
+ <braunr> nlightnfotis: yes but it's buggy
+ <braunr> and the description doesn't describe the bugs
+ <nlightnfotis> so we will either have to find a temporary workaround, or
+ better yet work on a fix, right?
+ <braunr> nlightnfotis: i also told you the work around
+ <braunr> nlightnfotis: create a thread pool
+ <nlightnfotis> braunr: since thread creation is also buggy, wouldn't the
+ thread pool be buggy too?
+ <braunr> nlightnfotis: creation *and* destruction is buggy
+ <braunr> nlightnfotis: i.e. recycling is buggy
+ <braunr> nlightnfotis: the hurd servers aren't affected much because the
+ worker threads are actually never destroyed on debian (because of a
+ debian specific patch)
+
+ <teythoon> youpi, nlightnfotis, hacklu_: btw, what about the copyright
+ assignment process
+ <tschwinge> nlightnfotis just got his on file, so there is progress.
+ <tschwinge> I have email from Donald R Robertson III
+ <copyright-clerk@fsf.org> about that -- but it is not yet present in the
+ FSF copyright.list file...
+ <tschwinge> I think I received that email because I was CCed on
+ nlightnfotis' submission.
+ <nlightnfotis> tschwinge: I have got the papers, and they were signed by
+ the FSF. They stated delivery date 11 of July, but the documents were
+ signed on the 10th of July :P
+ <tschwinge> Ah, no, I received it via hurd-maintainers@gnu.org -- and the
+ strange thing is that not all assignments that got processed got sent
+ there...
+ <tschwinge> At the recent GNU Tools Cauldron we also discussed this in the
+ GCC context; and their experience was the very same. Emails get lost,
+ and/or take ages to be processed, etc.
+ <tschwinge> It seems the FSF is undermanned.
+
+
+# IRC, freenode, #hurd, 2013-07-27
+
+ <nlightnfotis> I have one question about the Mach sources: I can see it
+ uses its own scheduler (more like, initializes) and also does the same
+ for the linux scheduler. Which one does it use?
+ <youpi> it doesn't use the linux scheduler
+ <youpi> the linux glue just glues linux scheduling concepts onto the mach
+ scheduler
+ <nlightnfotis> ohh I see now. Thanks for that youpi.
+
+
+# IRC, freenode, #hurd, 2013-07-28
+
+ <nlightnfotis> In the mach kernel source code, does the (void) before a
+ function call have a semantic meaning, or is it just remnants of the past
+ (or even documentation)
+ <pinotree> for example?
+ <nlightnfotis> pinotree: (void) thread_create (kernel_task,
+ &startup_thread);
+ <nlightnfotis> I read on stack overflow that there is only one case where
+ it has a semantic meaning, most of the times it doesn't
+ <nlightnfotis>
+ http://stackoverflow.com/questions/13954517/use-of-void-before-a-function-call
+ <pinotree> most probably thread_create has a non-void return value, and
+ this way you're explicitly suppressing its return value (usually because
+ you don't want/need to care about it)
+ <nlightnfotis> isn't the value discarded if the (void) is not there?
+ <pinotree> yes, but depending on extra attributes and/or compiler warning
+ flags the compiler might warn that the return value is not used while it
+ ought to
+ <pinotree> the cast to void should suppress that
+ <nlightnfotis> oh, okay, thanks for that pinotree
+ <nlightnfotis> and yes you are right that thread_create actually does
+ return something
+ <pinotree> even if there would be no compiler message about that, adding
+ the explicit cast could mean "yes, i know the function does return
+ something, but i don't care about it"
+ <pinotree> ... as hint to other code readers
+ <nlightnfotis> as a form of documentation then
+ <pinotree> also
+
+ <nlightnfotis> oh well, I am gonna ask and I hope someone will answer it:
+ In the Mach's dmesg (/var/log/dmesg) I can see that the version string
+ along with initial memory mapping information are printed twice, when in
+ fact they are supposed to be called only once. Is this a bug, or some
+ buffering error, or are they actually called twice for some reason?
+
+
+# IRC, freenode, #hurd, 2013-07-29
+
+ <nlightnfotis> guys is the evaluation today?
+ <hacklu_> yes
+ <teythoon> right
+ <nlightnfotis> where can we find the evaluation papers on melange?
+ <hacklu_> wait untill 12pm UTC.
+ <nlightnfotis> yeah, I just noticed thanks hacklu_
+ <hacklu_> nlightnfotis:)
+
+ <NlightNFotis> tschwinge: I only have one question regarding my project. If
+ I make some changes to libpthread, what's the best way to test them in
+ the hurd? Rebuild glibc with the updated libpthread?
+ <tschwinge> NlightNFotis: Yes, you'll have to rebuild glibc. I have a
+ cheat sheet for that:
+ http://darnassus.sceen.net/~hurd-web/open_issues/glibc/debian/
+ <tschwinge> It may be that the »Run debian/rules patch to apply patches«
+ step is no longer encessary with the 2.17 glibc packages.
+ <NlightNFotis> thanks for that tschwinge. :)
+ <tschwinge> NlightNFotis: Sure. :-)
+
+ <tschwinge> NlightNFotis: Where's your weekly status?
+ <NlightNFotis> I will write it today at the noon. I have written all the
+ other ones, and they are available at www.fotiskoutoulakis.com
+ <NlightNFotis> the next one will be available there as well, later in the
+ day
+ <tschwinge> Ack. But please try to finish your report before the meeting,
+ as discussed.
+ <NlightNFotis> oh, forgive me for that. I thought it was ok to write my
+ report a day or so later. Sorry.
+ <tschwinge> NlightNFotis: Please write your report as soon as possible --
+ otherwise there's no useful way for me to know what your status is.
+ <NlightNFotis> I will. This week I have been mostly going through the
+ various sources (the Hurd, Mach and libpthread, especially the last two)
+ in my attempt to get a better understanding for how libpthread
+ works. Since yesterday I have attempted some small changes on my
+ libpthread repo that I plan on testing and reporting on them. That's why
+ I still have not written my report.
+ <tschwinge> NlightNFotis: Things don't need to be finished before you
+ report about them. It's often more useful to discuss issues *before* you
+ spend time on implementing them.
+ #hurd
+ <braunr> NlightNFotis: what kind of changes do you want to add to
+ libpthread ?
+ <tschwinge> Have a look at the asseriton failure, I would hope. :-)
+ <braunr> well no
+ <braunr> again, i did that
+ <braunr> and it's not easy to fix
+ <NlightNFotis> braunr: I was looking into ways that I could create the
+ thread pool you suggested into libpthread
+ <braunr> no, don't
+ <braunr> create it in your application
+ <braunr> not in libpthread
+ <braunr> well, this may not be an acceptable solution either ..
+ <tschwinge> Before doing that we have to understand what exactly the Go
+ runtime is doing. It may just be a weird itneraction with the setcontext
+ et al. functions that I failed to think about when implementing these?
+ <NlightNFotis> the other possibility is the go runtime libraries. But I
+ thought that libpthread might be a better idea, since you told me that
+ creation *and* destruction are buggy
+ <hacklu> braunr: you are right, the signal thread is always exist. I have
+ got a wrong understand before.
+ <NlightNFotis> tschwinge: I can look into that, now. I will also include
+ that in my report.
+ <braunr> NlightNFotis: i don't see how this is a relevant argument ..
+ <braunr> tschwinge: i'd suggest he first try with a custom pool in the go
+ runtime, so we exclude what you're suspecting
+ <braunr> if this pool actually works around the issues NlightNFotis is
+ having, it will confirm the offending problem comes from libpthread
+ <tschwinge> So, as a very first step make any thread
+ distruction/deallocation a no-op.
+ <braunr> yes
+ <NlightNFotis> braunr: I originally understood that a thread pool might
+ skip the thread's destruction, so that we escape the buggy part with the
+ thread's destruction. Since that was a problem with libpthread, it sure
+ affects other threads (instead of go's ) too. So I assumed that building
+ the thread pool into libpthread might help eliminate bugs that may affect
+ other code too.
+ <braunr> no, it's not a proper fix
+ <braunr> it's a work around
+ <braunr> and i'm working on a proper fix in parallel
+ <braunr> (when i have the time, that is :/)
+ <NlightNFotis> oh, I see. So for the time, I had better not touch
+ libpthread, and take a look at the go run time aye?
+ <tschwinge> NlightNFotis: Remember: one thing after the other. First
+ identify what is wrong exactly. Then think and discuss how to solve the
+ very specific issue. Then implement it.
+ <braunr> as tschwinge said, make thread destruction a nop in go
+ <braunr> see if that helps
+ <tschwinge> NlightNFotis: For example, you surely have noticed (per your
+ last report), that basically all Go language test pass (aside from the
+ handful of those testing select, etc.) -- but all those of the libgo
+ runtime library fail, literally all of them.
+ <tschwinge> You noticed they basically all fail with the same assertion
+ failure. But why do all the Go language ones work fine?
+ <tschwinge> Don't they execute the program they built, for example?
+ <tschwinge> (I haven't looked.)
+ <NlightNFotis> they do execute the program. the language ones that fail
+ too, fail due to the assertion failure
+ <tschwinge> Or, what else is different for them? How are they built, which
+ flags, how are they invoked.
+ <braunr> how many goroutines ?
+ <braunr> :p
+ <tschwinge> Do you also get the assertion failure when you built a small Go
+ program yourself and run that one.
+ <tschwinge> Don't get the assertion failure? Then add some more complex
+ stuff that are likely to invole adding/re-using new threads, such as
+ goroutines.
+ <NlightNFotis> I didn't get the assertion failure on a small test program,
+ but now that you suggest it it might be a good idea to build a custom
+ test suite
+ <tschwinge> Etc. That way you'll eventually get an understanding what
+ triggers the assertion failure.
+ <tschwinge> And that exeactly is the kind of analysis I'd like to read in
+ your weekly report.
+ <tschwinge> A list of things what you have done, which assuptions you've
+ made, how that directed your further analysis, what results that gave,
+ etc.
+ <NlightNFotis> I will do it. I will try to rush to finish it today before
+ you leave, so that you can inspect it. God I feel like all that time I
+ spent this week studying the particular source code (libpthread, and the
+ Mach) were in vain...
+ <NlightNFotis> on second thoughts, it was not in vain. I got a pretty good
+ understanding of how these pieces of software work, but now I will have
+ to do something completely different.
+ <tschwinge> Studying code is never in vain.
+ <tschwinge> Exactly.
+ <tschwinge> You must have had some motivation to study the code, so that
+ was surely a valid thing to do.
+ <tschwinge> But we'd link to understand your reasoning, so that we can
+ support you and direct you accordingly.
+ <braunr> but it's better to focus on your goals and determine an
+ appropriate course of actions, usually starting with good analysis
+ <tschwinge> Yes.
+ <pinotree> s/link/like/?
+ <tschwinge> pinotree: Indeed, thanks.
+ <braunr> makes me remember when i implemented radix trees to replace splay
+ trees, only to realize splay trees were barely used ..
+ <tschwinge> braunr: Yes. It has happened to all of us. ;-P
+ <tschwinge> NlightNFotis: So, don't worry -- but learn from such things.
+ :-)
+ <NlightNFotis> anyway, I will start right away with the courses of action
+ you suggested, and will try to have finished them by noon. Thanks for
+ your help, it really means a lot.
+ <tschwinge> In software generally, it is never a good idea to let you be
+ distracted, and don't follow your focus goal, because there are always so
+ many different things that could be improved/learned/fixed/etc.
+ <NlightNFotis> tschwinge, I am only nervous about one thing: the fact that
+ I have not submitted yet any patch or some piece of code in general. Then
+ again, the summer of code for me so far has been 70-80% reading about
+ stuff I didn't know about and 30-20% doing the stuff I should know
+ about...
+ <tschwinge> NlightNFotis: That's why we're here, to teach you something.
+ Which we're happy to do, but we all need to cooperate for that (and I'm
+ well aware that this is difficult if one is not in the same rooms, and
+ I'm also aware that my time is pretty limited).
+ <tschwinge> NlightNFotis: We're also very aware that the Hurd system, as
+ any operating system project (if you're not just doing "superficial"
+ things) is difficult, and takes lots of time to learn, and have concepts
+ and things sink into your brain.
+ <braunr> i wouldn't worry too much
+ <tschwinge> We're also still learning every day.
+ <braunr> go doesn't require a lot from the underlying system, but what is
+ required is critical
+ <braunr> once you identify it, coding will be quick
+ <NlightNFotis> tschwinge: braunr: thanks. I shall begin working following
+ the directions you gave to me.
+ <tschwinge> NlightNFotis: So yes, because Google wants us to grade you
+ based on that, you'll eventually have to write some code, but for
+ example, a patch to disable thread distruction/deallocation in libgo
+ would definitely count as such code. And that seems like one of your
+ next steps.
+ <NlightNFotis> tschwinge: i need to deliver that instantly, right? seeing
+ as the evaluation is today.
+ <tschwinge> NlightNFotis: No. Deliver it when you have something to
+ deliver. :-)
+ <NlightNFotis> tschwinge: I am nervous about the evaluation today. I have
+ not submitted a single piece of code, only some reports. How negatively
+ does this influence my performance report?
+ <tschwinge> NlightNFotis: If I can say so, in the evaluation today, Google
+ basically asks us mentors whether we want to fail our students right now.
+ Which I don'T plan to do, knowing about the complexity of the Hurd
+ system, and the learning required before you can do useful code changes.
+ <NlightNFotis> tschwinge: that really means a lot to me, and it got a
+ weight of my chest.
+ <braunr> uh ok, i have to be the rude guy again
+ <braunr> NlightNFotis: the gsoc is also a way for the student to prepare
+ for working in software development communities
+ <braunr> whether free software/open source and/or in companies
+ <braunr> people involved care a lot less about pathos than actual results
+ <pinotree> (or to prepare students to be hired by google, but that's
+ another story)
+ <braunr> NlightNFotis: in other words, stop apologizing that much, stop
+ focusing so much on that, and just work as you can
+
+
+# IRC, freenode, #hurd, 2013-07-31
+
+ <nlightnfotis> teythoon: both samuel and thomas would be missing for the
+ week right?
+ <teythoon> nlightnfotis: they do, why?
+ <teythoon> nlightnfotis: err, they do?? why?
+
+
+# IRC, freenode, #hurd, 2013-08-01
+
+ <nlightnfotis> braunr: I checked out what you (and Thomas) suggested and
+ did some research on go on the Hurd. I have found out that go works,
+ until you need to use anything that has to do with a goroutine. I am now
+ playing with the go runtime and checking to see if turning thread
+ destruction to noop will have any difference.
+
+
+# IRC, freenode, #hurd, 2013-08-05
+
+ <nlightnfotis> youpi: whenever you have time, I would like to report my
+ progress as well.
+ <youpi> nlightnfotis: sure, go ahead
+ <youpi> but again, you should report before the meeting
+ <youpi> so we can read it before coming to the discussion
+ <nlightnfotis> I have written my report
+ <youpi> ah
+ <hacklu> nlightnfotis: I have read your report, these days you have make a
+ great progress.
+ <youpi> where is it?
+ <nlightnfotis> it was available since yesterday
+ <nlightnfotis>
+ http://www.fotiskoutoulakis.com/blog/2013/08/05/gsoc-partial-week-7-report/
+ <nlightnfotis> thanks hacklu. The particular piece of code I was studying
+ was very very interesting :)
+ <hacklu> nlightnfotis: I think you should show your link in here or email
+ next time. I have spend a bit more time to find that :)
+ <nlightnfotis> youpi: for a tldr, at the last time I was told to check
+ gccgo's runtime for clues regarding the go routine failures.
+ <nlightnfotis> hacklu: will keep that in mind, thanks.
+ <nlightnfotis> youpi: thing is, gccgo operates on two different thread
+ types: G's (the goroutines, lightweight threads that are managed by the
+ runtime) and M's (the "real" kernel threads")
+ <nlightnfotis> none of which are really "destroyed"
+ <youpi> ok, makes sense
+ <nlightnfotis> G's are put in a pool of available goroutines when their
+ status is changed to "Gdead" so that they can be reused
+ <nlightnfotis> M's also don't seem to go away. There is always at least one
+ M (the bootstrap one) and all other M's that get created are also stashed
+ in a pool of available working threads.
+ <youpi> you could put some debugging printfs in libpthread, to make sure
+ whether threads do die or not
+ <nlightnfotis> I am studying this further as we speak, but they both don't
+ seem to get "destroyed", so that we can be sure that bugs are triggered
+ by thread destruction
+ <nlightnfotis> I was beginning to believe that maybe I was looking in the
+ wrong direction
+ <nlightnfotis> but then I looked at my past findings, and I noticed
+ something else
+ <nlightnfotis> if you take a look at the first failed go routine, it failed
+ at the time.sleep function, which puts a goroutine to sleep for ns
+ nanoseconds. That made me think if it was something that had to do with
+ the context functions and not the goroutines' creation.
+ <youpi> nlightnfotis: that's possible
+ <youpi> nlightnfotis: I'd say you can focus on this very simple example: a
+ mere sleep
+ <youpi> that's one of the simplest things a thread scheduler has to do, but
+ it has to do it right
+ <youpi> fixing that should fix a lot of other issues
+ <nlightnfotis> if I have understood correctly, there is at least one G
+ (Goroutine) and at least one M (kernel thread) running. Sleep does put
+ that goroutine at a hold, and restarting it might be an issue
+ <braunr> talking about thread scheduling ? :)
+ <youpi> nlightnfotis: go's runtime doesn't actually destroy kernel threads,
+ apparently
+ <nlightnfotis> youpi: yeah, that's what I have understood so far. And it
+ neither does destroy goroutines. If there was an issue with thread
+ creation, then I guess it should be triggered in the beginning of the
+ program too (seeing as both M's and G's are created there)
+ <nlightnfotis> the fact that it is triggered when a goroutine goes to sleep
+ makes me suspect the context functions
+ <youpi> yes
+ <nlightnfotis> again I am studying it the last days, in search of
+ clues. Will keep you all updated.
+ <nlightnfotis> braunr: I have written my report and it is available here
+ http://www.fotiskoutoulakis.com/blog/2013/08/05/gsoc-partial-week-7-report/
+ If you could read it and tell me if you notice something weird tell me
+ so.
+ <braunr> nlightnfotis: ok
+ <braunr> nlightnfotis: quite busy here so don't worry if i suddenly
+ disappear
+ <braunr> nlightnfotis: hum, does go implement its own threads ??
+ <nlightnfotis> braunr: yeah. It has 2 threads. Runtime managed (the
+ goroutines) and "real" (kernel managed) ones.
+ <braunr> i mean, does it still use libpthread ?
+ <nlightnfotis> thing is none of them "disappear" so as to explain the bug
+ with "thread creation **and** destruction)
+ <nlightnfotis> it must use libpthread for kernel threads as far as creation
+ goes.
+ <braunr> ok, good
+ <braunr> then, it schedules its own threads inside one pthread, right ?
+ <braunr> using the pthread as a virtual cpu
+ <nlightnfotis> yes. It matches kernel threads and runtime threads and runs
+ the kernel threads in reality
+ <nlightnfotis> the scheduler decides which goroutine will run on each
+ kernel thread.
+ <braunr> ew
+ <braunr> this is pretty much non portable
+ <braunr> and you're right to suspect context switching functions
+ <nlightnfotis> yeah my thought for it was the following: thread creation,
+ if it was buggy, should be triggered as soon as a program starts, seeing
+ as at least one kernel thread and at least one go routine starts. My
+ sleep experiment crashes when the goroutine is put on hold
+ <braunr> did you find the code putting on hold ?
+ <nlightnfotis> I will give you the exact link, wait a moment
+ <nlightnfotis> braunr:
+ https://github.com/NlightNFotis/gcc/blob/master/libgo/runtime/time.goc?source=c#L59
+ <nlightnfotis> that is the exact location is line 26, which calls the one I
+ pointed you at
+ <braunr> ahah, tsleep
+ <braunr> old ghost from the past
+ <braunr> nlightnfotis: the real location is probably runtime_park
+ <nlightnfotis> I will check this out.
+
+ <nlightnfotis> may I ask something non-technical but relevant to summer of
+ code?
+ <braunr> sure
+ <nlightnfotis> would it be okay if I took the day off tomorrow?
+ <braunr> nlightnfotis: ask tschwinge but i guess it's ok
+
+ <braunr> have you found runtime_park ?
+ <braunr> i'm downloading your repository from github but it's slow :/
+ <nlightnfotis> braunr: not yet. Grepping through the files didn't produce
+ any meaningful results and github's search is not working
+ <nlightnfotis> braunr: there is that strange thing with th gccgo sources,
+ where I can find a function's declaration but not it's definition. Funny
+ thing is those functions are not really extern, so I am playing a hide
+ and seek game, in which I am not always successful.
+ <nlightnfotis> runtime_park is declared in runtime.h. I have looked nearly
+ everywhere for it. There is only one last place I have not looked at.
+ <nlightnfotis> braunr: I found runtime_park. It's here:
+ https://github.com/NlightNFotis/gcc/blob/master/libgo/runtime/proc.c?source=c#L1372
+
+ <tschwinge> nlightnfotis: Taking the day off is fine. Have fun!
+ <nlightnfotis> tschwinge: I am still here; Thanks for that tschwinge. I
+ will be for the next half hour or something if you would like to ask me
+ anything
+ <tschwinge> nlightnfotis: I have no immediate questions (first have to read
+ your report and discussion in here) -- so feel free to log out and enjoy
+ the sun outside. :-)
+
+ <teythoon> nlightnfotis, tschwinge: btw, have you seen
+ http://morsmachine.dk/go-scheduler ?
+ <nlightnfotis> teythoon: thanks for the link. It's really interesting.
+
+
+# IRC, freenode, #hurd, 2013-08-12
+
+ <nlightnfotis> teythoon did you manage to build the Hurd successfuly?
+ <teythoon> ah yes, the Hurd is relatively easy
+ <teythoon> the libc is hard
+ <nlightnfotis> debian glibc or hurd upstream libc?
+ <teythoon> but my build on darnassus was successful
+ <nlightnfotis> *debian eglibc
+ <teythoon> well, I rebuilt the debian package with two tweaks
+ <nlightnfotis> do you build on linux and rsync on hurd or ...?
+ <teythoon> I built it on Hurd, though I thought about setting up a cross
+ compiler
+ <nlightnfotis> I see. The process was build Mach, build Hurd, and then
+ build glibc and it's ready or it needed more?
+ <teythoon> no, I never built Mach
+ <teythoon> I must admit I'm not sure about the "proper" procedure
+ <teythoon> if I change one of Hurds RPC definitions, I think the proper way
+ is to rebuild the libc against the new definitions and then the Hurd
+ <teythoon> but I found no way to do that, so everyone seems to build the
+ Hurd, install it, build the libc and then rebuild the Hurd again
+ <nlightnfotis> I see. Thanks for that :)
+
+ <nlightnfotis> tschwinge, I have also written my report! It's available
+ here
+ http://www.fotiskoutoulakis.com/blog/2013/08/12/gsoc-week-8-partial-report/
+ <nlightnfotis> I can sum it up if you want me to.
+ <tschwinge> nlightnfotis: I already read it! :-D
+ <tschwinge> Oh, I didn't. I read the week 7 one. Let me read week 8. ;-)
+ <nlightnfotis> ok. I am currently going through the assembly generated for
+ the sample program I have embedded my report.
+ <nlightnfotis> the weird thing is that the assembly generated is pretty
+ much the same for the program with 1 and 2 goroutine functions (with the
+ obvious difference that the one with 2 goroutine functions has 1 more
+ goroutine in it's assembly code)
+ <nlightnfotis> I can not understand why it is that when I have 1 goroutine,
+ an exception is triggered, but when I am having two (which are 99%
+ identical) it seems to be executed.
+ <nlightnfotis> and I do not understand why the exception is triggered when
+ I manually use a goroutine.
+ <nlightnfotis> To my understanding so far, there is at least 1 (kernel)
+ thread created at program startup to run main. The same thread gets
+ created to run a new goroutine (goroutines get associated with kernel
+ threads)
+ <nlightnfotis> and it's obvious from the assembly generated.
+ <nlightnfotis> go_init_main (the main function for go programs) starts with
+ a .cfi_startproc
+ <nlightnfotis> the same piece of code (.cfi_startproc) starts a new kernel
+ thread (on which a goroutine runs)
+ <tschwinge> nlightnfotis: Re your two-goroutines example: in that case I
+ assume, you're directly returning from the main function and the program
+ terminates normally. ;-)
+ <tschwinge> nlightnfotis: Studying the assembly code for this will be too
+ verbose, too low-level. What we need is a trace of steps that happen
+ until the error.
+ <nlightnfotis> tschwinge, that must be it, but it should trigger the bug,
+ since it still has at least one goroutine (and one is known to trigger
+ the bug)
+ <tschwinge> nlightnfotis: I guess the program exits before the first
+ gorouting would be scheduled for execution.
+ <nlightnfotis> the assembly for the goroutines is identical. You can't tell
+ one from the other. The only change is that it has 2 of these sections
+ instead of one
+ <nlightnfotis> actually it's the same for the first one
+ <tschwinge> nlightnfotis: I very much assume that the issue is not due to
+ the code generated by the Go compiler (which you're seeing in the
+ assembly code), but rather due to the runtime code in the libgo library.
+ <nlightnfotis> I didn't think of it this way.
+ <tschwinge> ... that improperly interacts with our libpthread.
+ <nlightnfotis> so my research should focus on the runtime from now on?
+ <tschwinge> Improperly may well imply that our libpthread is at fault, of
+ course, as we discussed.
+ <tschwinge> Back to the one-gouroutine case (that shows the assertion
+ failure). Simple case: one goroutine, plus the "main" thread.
+ <tschwinge> We need to get an understanding of the steps that happen until
+ the error happens.
+ <tschwinge> As this is a parallel problem, and it is involving "advanced"
+ things (such as setcontext), I would not trust GDB too much when used on
+ this code.
+ <nlightnfotis> I will have to manually step through the source myself,
+ right?
+ <tschwinge> What I would do, is add printf's (or similar) into the code at
+ critical points, to get an udnerstanding of what's going on.
+ <tschwinge> Such critical points are: pthread_create, setcontext,
+ swapcontext.
+ <nlightnfotis> It sounds like a good idea. Anything else to note?
+ <tschwinge> That way, you can isolate the steps required to trigger the
+ assertion failure.
+ <tschwinge> For example, it could be something like: makecontext,
+ swapcontext, pthread_creat, boom.
+ <nlightnfotis> pthread_create_internal is failing at an assertion. I wonder
+ what would happen if I remove that assertion.
+ <tschwinge> Not without understanding what the error is, and why it is
+ happening (which steps lead to it). We don't usually do »voodoo
+ computing and programming by coincidence«.
+ <nlightnfotis> tschwinge, I also figured out something. If it is a
+ libpthread issue, it should also get triggered when a simple C program
+ creates a thread (assuming _pthread_create is causing the issue)
+ <nlightnfotis> so maybe I should write a C program to test that
+ functionality and see if it provides any further clues?
+ <tschwinge> nlightnfotis: That's precile what the goal of »isolate the
+ steps required to trigger the assertion failure« is about: reduce the big
+ libgo code to a few function calls required to reproduce the problem.
+ <tschwinge> nlightnfotis: I simple C program just doing pthread_create
+ evidently does not fail.
+ <tschwinge> nlightnfotis: I assume you have a Go program dynamically linked
+ to the libgo you build?
+ <nlightnfotis> yes. To the latest go build from the source (4.9)
+ <nlightnfotis> *gccgo build from source
+ <braunr> removing an assertion is usually extremely bad practice
+ <tschwinge> Then you can just do something like make target-libgo (IIRC)
+ (or instead: cd i686-pc-gnu/libgo/ && make) to rebuild your changed
+ libgo, and then re-run the Go program.
+ <braunr> the thought of randomly removing assertions shouldn't even reach
+ your mind !
+ <nlightnfotis> braunr: even if it is not permanent, but an experiment?
+ <braunr> yes
+ <nlightnfotis> can you explain to me why?
+ <tschwinge> nlightnfotis: <tschwinge> Not without understanding what the
+ error is, and why it is happening (which steps lead to it). We don't
+ usually do »voodoo computing and programming by coincidence«.
+ <braunr> an assertion exists to make sure something that should *never*
+ happen never happens
+ <braunr> removing it allows such events to silently occur
+ <teythoon> braunr: that's the theory, yes, to check invariants
+ <braunr> i dont' know what you mean by using assertions for "an experiment"
+ <teythoon> unfortunately some people use assert for error handling :/
+ <braunr> that's wrong
+ <braunr> and i dont't remember it to be the case in libpthread
+ <braunr> nlightnfotis: can you point the faulting assertion again there
+ please ?
+ <nlightnfotis> braunr: sure: Assertion `({ mach_port_t ktid =
+ __mach_thread_self (); int ok = thread->kernel_thread == ktid;
+ <nlightnfotis> __mach_port_deallocate ((__mach_task_self + 0), ktid); ok;
+ })' failed.
+ <braunr> so basically, thread->kernel_thread != __mach_thread_self()
+ <braunr> this code is run only for num_threads == 1
+ <braunr> but has there been any thread destruction before ?
+ <nlightnfotis> no. To my understanding kernel threads in the go runtime
+ never get destroyed (comments seem to support that)
+ <braunr> IOW: is it certain the only thread left *is* the main thread ?
+ <braunr> hm
+ <braunr> intuitively, i'd say this is wrong
+ <braunr> i'd say go doesn't destroy threads in most cases, but something in
+ the go runtime must have done it already
+ <braunr> i'm not even sure the main thread still exists
+ <braunr> check that
+ <braunr> where is the go code you're working on ?
+ <nlightnfotis> there are 3 files of interest
+ <braunr> i'd like the whole sources please
+ <nlightnfotis> I will find it in a moment
+ <tschwinge> braunr: GCC Git clone, tschwinge/t/hurd/go branch.
+ <nlightnfotis> it is <gcc_root>/libgo/runtime/runtime.h
+ <nlightnfotis> it is <gcc_root>/libgo/runtime/proc.c
+ <braunr> tschwinge: thanks
+ <tschwinge> braunr: git://gcc.gnu.org/git/gcc.git
+ <nlightnfotis> I will provide links on github
+ <braunr> nlightnfotis: i sayd the whole sources, why do you insist on
+ giving me separate files ?
+ <nlightnfotis> for checking it out quickly
+ <nlightnfotis> oh I misunderstood that sorry
+ <nlightnfotis> thought you wanted to check out thread creation and
+ destruction and that you were interested only in those specific files
+ <braunr> tschwinge: is it completely contained there or are there external
+ libraries ?
+ <tschwinge> braunr: You mean libgo?
+ <braunr> tschwinge: possibly
+ <nlightnfotis> tschwinge, I just made sure that yeah programs are
+ dynamically linked against the compiler's libgo
+ <nlightnfotis> libgo.so.3
+ <braunr> does libgo come from gcc sources ?
+ <nlightnfotis> yeah
+ <braunr> ok
+ <nlightnfotis> go files on gcc sources are split under two directories: go,
+ which contains the frontend go, and libgo which contains the libraries
+ and the runtime code
+ <tschwinge> braunr: darnassus:~tschwinge/tmp/gcc/go.build/ is a recent
+ build, with sources in $PWD/../go/.
+ <tschwinge> braunr: libgo is in i686-unknown-gnu0.3/libgo/.libs/
+ <nlightnfotis> so tschwinge to roundup for this week I should print debug
+ around the "hotspots" and see if I can extract more information about
+ where the specific problem is triggered right?
+ <tschwinge> nlightnfotis: Yes, for a start.
+ <braunr> nlightnfotis: identify the main thread, make sure it doesn't exit
+ <nlightnfotis> noted.
+ <nlightnfotis> braunr: do you have an idea about the issue I described
+ earlier? The one with the 1 goroutine triggering the bug, but the 2
+ exiting successfully but with no output?
+ <braunr> nlightnfotis: i didn't read
+ <nlightnfotis> do you have 2 mins to read my report? I describe the issue
+ <braunr> something messed up in the context i suppose
+ <tschwinge> nlightnfotis: Uhm, I already explained that issue?
+ <braunr> you did ?
+ <nlightnfotis> tschwinge, I know, don't worry. I am trying to get all the
+ insight I can get.
+ <nlightnfotis> you mentioned that the scheduler might have an issue and
+ that the main thread returns before the goroutines execu
+ <nlightnfotis> *execute
+ <nlightnfotis> right?
+ <tschwinge> It is the normal thing for a process to terminate normally when
+ the main function returns. I would expect Go to behave the same way.
+ <braunr> "Now, if we change one of the say functions inside main to a
+ goroutine, this happens"
+ <braunr> how do you change it ?
+ <tschwinge> Or am I confused?
+ <braunr> tschwinge: i don't remember exactly
+ <nlightnfotis> braunr: from say("world") to go say("world")
+ <nlightnfotis> tschwinge, yeah I get that. What I still have not understood
+ is what is it specifically about the 2 goroutines that doesn't trigger
+ the issu when 1 goroutine does.
+ <nlightnfotis> You said that it might have something to do with the
+ scheduler; it does seem like a good explanation to me
+ <tschwinge> nlightnfotis: My understanding still is that the goroutinges
+ don't get executed before the main thread exits.
+ <braunr> which scheduler ?
+ <nlightnfotis> braunr: the runtime (go) scheduler.
+ <nlightnfotis> tschwinge, Yeah, they don't. But still, with 1 goroutine:
+ you get into main, attempt to execute it, and bam! With two, it should be
+ the same, but strangely it seems to exit main without an issue
+ <nlightnfotis> (attempt to execute the goroutine)
+ <braunr> why should it be the same ?
+ <nlightnfotis> braunr: seeing as one goroutine has problems, I can't see
+ why two wouldn't. At least one of the two should result in an exception.
+ <braunr> nlightnfotis: why ?
+ <braunr> nlightnfotis: they do have the problem
+ <braunr> they don't run
+ <braunr> they just don't run into that assertion, probably because there is
+ more than one thread
+ <nlightnfotis> wait a minute. You imply that they fail silently? But still
+ end up in the same situation
+ <braunr> yes
+ <braunr> in which case it does look like a go scheduler problem
+ <nlightnfotis> if I understood it correctly, that assertion fails when it
+ is only 1 thread?
+ <braunr> yes
+ <braunr> and since the main thread is always correct, i expect the main
+ thread has exited
+ <braunr> which this happens because the one thread left is *not* the main
+ thread
+ <braunr> (which is a libpthread bug)
+ <braunr> but it's a bug we've not seen because we don't have applications
+ creating threads while exiting
+ <nlightnfotis> I think I got it now.
+ <braunr> try to put something like getchar() in your go program
+ <braunr> something that introduces a break
+ <braunr> so that the main thread doesn't exit
+ <nlightnfotis> oh right. Thanks for that. And sorry tschwinge I reread what
+ you said, it seems I had misinterpreted what you suggested.
+ <tschwinge> braunr: If you're interested: for a Go program triggering the
+ asserition, I don't see any thread exiting (see
+ darnassus:~tschwinge/tmp/gcc/a.go, run: cd ~tschwinge/tmp/gcc/go.build/
+ && ./a.out) -- but perhaps I've been looking for the wrong things in l_.
+ File l is without a goroutine. Have to leave now, sorry.
+ <tschwinge> braunr: If you want to rebuild: gcc/gccgo -B gcc -B
+ i686-unknown-gnu0.3/libgo ../a.go -Li686-unknown-gnu0.3/libgo/.libs
+ -Wl,-rpath,i686-unknown-gnu0.3/libgo/.libs
+ <braunr> tschwinge: no i won't touch anything
+ <braunr> but thanks
+
+
+# IRC, freenode, #hurd, 2013-08-19
+
+ <youpi> nlightnfotis: how are you going with gcc go?
+ <nlightnfotis> I was print debugging all the week.
+ <nlightnfotis> I can tell you I haven't noticed anything weird so far.
+ <nlightnfotis> But I feel I am close to the solution
+ <nlightnfotis> I have not written my report yet.
+ <nlightnfotis> I will write it maximum until wednesday
+ <nlightnfotis> I hope I will have figured it all out until then
+ <pinotree> a report is not for writing solutions, but for the progress
+ <youpi> yes
+ <youpi> it's completely fine to be saying "I've been debugging, not found
+ anything yet"
+ <pinotree> results or not, always write your reports on time, so your
+ mentor(s) know what you are doing
+ <nlightnfotis> I see. Would you like me to write it right now, or is it
+ okay to write it a day or two later?
+ <hacklu__> nlightnfotis: FYI. this week my report is not finished. just
+ state some problem I face now.
+ <youpi> nlightnfotis: I'd say better write it now
+ <nlightnfotis> youpi: Ok I will write it and tell you when I am done with
+ it.
+ <nlightnfotis> youpi: here is my partial report describing what my course
+ of action looked like this
+ week. http://www.fotiskoutoulakis.com/blog/2013/08/19/gsoc-week-9-partial-report/
+ <nlightnfotis> of course, I will write in a day or two (hopefully having
+ figured out the whole situation) an exhaustive report describing
+ everything I did in detail
+ <nlightnfotis> youpi: I have written my (partial) report describing how I
+ went about this week
+ http://www.fotiskoutoulakis.com/blog/2013/08/19/gsoc-week-9-partial-report/
+ <youpi> nlightnfotis: good, thanks!
+ <nlightnfotis> youpi: please note that this is not an exhaustive link of my
+ findings or course of action, it merely acts as an example to demonstrate
+ the way I think and how I go about every day.
+ <nlightnfotis> I will write an exhaustive report of everything I did so
+ far, when I figure out what the issue is, and I feel I am close.
+ <youpi> well, you don't need to explain all bits in details
+ <youpi> this is fine to show an example of how you went
+ <youpi> but please also provide a summary of your other findings
+ <nlightnfotis> oh okay, I will keep this in mind. :)
+
+
+# IRC, freenode, #hurd, 2013-08-22
+
+ < nlightnfotis> if I want to rebuild libpthread, I have to embed it into
+ eglibc's source, then build?
+ < pinotree> or pick the debian sources, patch libpthread there and rebuild
+ < nlightnfotis> that's most likely what I am going to do. Thanks pinotree.
+ < pinotree> yw
+ < braunr> nlightnfotis: i usually add my patches on top of the debian glibc
+ ones, yes
+ < braunr> it requires some tweaking
+ < braunr> but it's probably the easiest way
+ < nlightnfotis> braunr: I was studying my issues with gcc, and everyday I
+ was getting more and more confident it must be a libpthread issue
+ < nlightnfotis> and I figured out, that I might wanna play with libpthread
+ this time
+ < braunr> it probably is but
+ < braunr> i'm not so sure you should dive there
+ < nlightnfotis> why not?
+ < braunr> because it can be worked around in go
+ < braunr> i had a test for you last time
+ < braunr> do you remember what it was ?
+ < nlightnfotis> nope :/ care to remind it?
+ < braunr> iirc, it was running the go test you did but with an additional
+ instruction in the main function, that pauses
+ < braunr> something like getchar() in c
+ < braunr> to make sure main doesn't exit while the goroutines are still
+ running
+ < braunr> i'm almost positive that the bug you're seeing is main returning
+ and libpthread beleiving it's acting on the main thread because there is
+ only one left
+ < nlightnfotis> oh that's easy, I can do it now. But it's probably what
+ thomas had suggested: go routines may not be running at all.
+ < braunr> they probably aren't
+ < braunr> and that's a context bug
+ < braunr> not a libpthread bug
+ < braunr> and that's what you should focus on
+ < braunr> the libpthread bug is minor
+ < nlightnfotis> which is strange, because I had studied the assembly code
+ and it the code for the goroutine was there
+ < nlightnfotis> anyway I will proceed with what you suggested
+ < braunr> yes please
+ < braunr> that's becoming important
+ < nlightnfotis> would you mind me dumping some of my findings for you to
+ evaluate/ post on opinion on?
+ < braunr> no
+ < braunr> please do so
+ < nlightnfotis> I have found that the go runtime starts with a total number
+ of threads == 1
+ < braunr> nlightnfotis: as all processes
+ < nlightnfotis> I would guess that's because of using fork ()
+ < nlightnfotis> oh so it's ok
+ < braunr> there always is a main thread
+ < braunr> even for non-threaded applications
+ < nlightnfotis> yeah, that I know. The runtime proceeds to create
+ immediately one more.
+ < braunr> then it's 2
+ < nlightnfotis> and that's ok, it doesn't have an issue with that
+ < nlightnfotis> yep
+ < nlightnfotis> the issue begins when it tries to create the 3rd one
+ < braunr> hum
+ < braunr> from what i remember
+ < nlightnfotis> it happily goes through the go runtime's kernel thread
+ allocation function (runtime_newm())
+ < braunr> you also had an issue with the first goroutine
+ < nlightnfotis> that's with 1 go routine
+ < braunr> ok
+ < braunr> so 1 goroutine == 3 threads
+ < nlightnfotis> it seems so yes.
+ < braunr> depending on how the go scheduler is able to assign goroutines to
+ kernel threads i suppose
+ < nlightnfotis> mind you, (disclaimer: I am not so sure about that) that go
+ must be using one extra thread for the runtime scheduler and garbage
+ collector
+ < braunr> that's ok
+ < nlightnfotis> so that's where the two come from
+ < braunr> and expected from a modern runtime
+ < nlightnfotis> the third must be the go routime
+ < nlightnfotis> routine
+ < braunr> hum have to go
+ < braunr> brb in a few minutes
+ < braunr> keep posting
+ < nlightnfotis> it's ok take your time
+ < nlightnfotis> I will be here
+ < braunr> but i may not ;p
+ < braunr> in fact i will not
+ < braunr> i have like 15 mins ;)
+ < braunr> nlightnfotis: ^
+ < nlightnfotis> I am trying what you told me to do with go
+ < nlightnfotis> it's ok if you have to go, I will continue investigating
+ and be back tomorrow
+ < braunr> ok
+ < nlightnfotis> braunr: I tried what you asked me to do, both we waiting to
+ read a string from stdin and with waiting to read an int from stdin
+ < nlightnfotis> it never waits, it still aborts with the assertion failure
+ < nlightnfotis> both with one and two go routines
+ < nlightnfotis> dumping it here just for the log, running the same code
+ without waiting for input results in two threads created (1 for main and
+ 1 for runtime, most likely) and "normal" execution.
+ < nlightnfotis> normal as in no assertion failure,
+ < nlightnfotis> it seems to skip the goroutines altogether
+
+
+# IRC, freenode, #hurd, 2013-08-23
+
+ < braunr> nlightnfotis: can i see your last go test code please ? the one
+ with the read at the end of main
+ < nlightnfotis> braunr sure
+ < nlightnfotis> sorry I had gone to the toilet, now I am back
+ < nlightnfotis> I will send it right now
+ < nlightnfotis> braunr: http://pastebin.com/DVg3FipE
+ < nlightnfotis> it crashes when it attempts to create the 3rd thread (the
+ 1st goroutine), with the assertion fail
+ < nlightnfotis> if you remove the Scanf it will not fail, return 0, but
+ only create 2 threads (skip the goroutines alltogether)
+ < braunr> can you add a print right before main exits please ?
+ < braunr> so we know when it does
+ < nlightnfotis> doing it now
+ < nlightnfotis> braunr: If I enter a print statement right before main
+ exits, the assertion failure is triggered. If I remove it, it still runs
+ and creates only 2 threads.
+ < braunr> i don't understand
+ < braunr> 14:42 < nlightnfotis> it crashes when it attempts to create the
+ 3rd thread (the 1st goroutine), with the assertion fail
+ < braunr> why don't you get that ?
+ < nlightnfotis> This seems like having to do with the runtime. I mean, I
+ have seen the emitted assembly from the compiler, and the goroutines are
+ there. Something in the runtime must be skipping them
+ < braunr> context switching seems buggy
+ < nlightnfotis> if it's only goroutines in main
+ < nlightnfotis> if there's also something else in main, the assertion
+ failure is triggered.
+ < braunr> i want you to add a printf right before main exits, from the code
+ you pasted
+ < nlightnfotis> I did. It acts the same as before.
+ < braunr> do you see that last printf ?
+ < nlightnfotis> no. It aborts before that
+ < nlightnfotis> :q
+ < braunr> find a way to make sure the output buffer is flushed
+ < braunr> i don't know how it's done in go
+ < nlightnfotis> mistype the :q, was supposed to do it vim
+ < nlightnfotis> braunr will do right away
+ < nlightnfotis> there is one thing I still can not understand: Why is it
+ that two threads are ok, but when the next is going to get created, the
+ assertion is triggered.
+ < braunr> nlightnfotis: the assertion is triggered because a thread is
+ being created while there is only one thread left, and this thread isn't
+ the main thread
+ < braunr> so basically, the main thread has exited, and another (the last
+ one) is trying to create one
+ < nlightnfotis> the other one might be the runtime I guess. Let me check
+ out quickly what you suggested
+ < braunr> the main thread shouldn't exit at all
+ < braunr> so something with context switching is wrong
+ < nlightnfotis> the thing is: it doesn't seem to exit when this happens. My
+ debug statements (in the runtime) suggest that there are at least 2
+ threads active, kernel threads don't get destroyed in gccgo
+ < braunr> 14:52 < braunr> so something with context switching is wrong
+ < braunr> how well have the context switching functions been tested ?
+ < nlightnfotis> to be honest I have not tested them; up until this point I
+ trusted they worked. Should I also take a look at them?
+ < braunr> how can you trust them ?
+ < braunr> they've never been used ..
+ < braunr> thomas added them recently if i'm right
+ < braunr> nothing has been using them except go
+ < braunr> piece of advice: don't trust anything
+ < nlightnfotis> I think they were in before, and thomas recently patched
+ them!
+ < braunr> they were in, but didn't work
+ < braunr> (if i'm right)
+ < braunr> nlightnfotis: you could patch libpthread to monitor the number of
+ threads
+ < braunr> or the go runtime, idk
+ < nlightnfotis> I have done so on the go runtime
+ < nlightnfotis> that's where I am getting the number of threads I
+ report. That's straight out from the scheduler's count.
+ < braunr> threads can exit by calling pthread_exit() or returning from the
+ thread routine
+ < braunr> make sure you catch both
+ < braunr> also check for pthread_cancel(), although i don't expect any in
+ go
+ < nlightnfotis> braunr: Should I really do that? I mean, from what I can
+ see in gccgo's comments, Kernel threads (m) never go away. They are added
+ to a pool of m's waiting for work if there is no goroutine running on
+ them
+ < nlightnfotis> I mean, I am not so sure they exit at all
+ < braunr> be sure
+ < braunr> point me the code please
+ < nlightnfotis>
+ https://github.com/NlightNFotis/gcc/blob/master/libgo/runtime/proc.c#L224
+ < nlightnfotis> this is where it get's stated that m's never go away
+ < nlightnfotis> and at line 257 you can see the pool
+ < nlightnfotis> and wait for me to find the code that actually releases an
+ and places into the pool
+ < nlightnfotis> yep found it
+ < nlightnfotis> line 817 mput
+ < nlightnfotis> puts a kernel thread given as parameter to the pool
+ < nlightnfotis> another proof of the theory is at line 1177. It states:
+ "This point is never reached, because scheduler does not release os
+ threads at the moment."
+ < braunr> fetching git repository, bit busy, i'll have a look in 5-10 mins
+ < nlightnfotis> oh it's ok, I had pointed you to the file directly on
+ github to check it out instantly, but never mind, the file is
+ <gccroot>/libgo/runtime/proc.c
+ < braunr> damn github is so slow ..
+ < braunr> nlightnfotis: i much prefer my own text interface :)
+ < nlightnfotis> braunr: just out of curiosity what's your setup? I use vim
+ mainly (not that I am a vim expert or anything, I only know the basics,
+ but I love it)
+ < braunr> same
+ < braunr> nlightnfotis: add a trace at that comment to make SURE threads do
+ not exit
+ < braunr> you *cannot* get the libpthread assertion with more than 1 thread
+ < braunr> grep for pthread_exit() too
+ < nlightnfotis> will do it now. It will take about an hour to compile
+ though.
+ < braunr> i don't understand the stack trick at the start of runtime_mstart
+ < braunr> ah splitstack ..
+ < nlightnfotis> I think I should try cross compiling gcc, and then move
+ files on the hurd. It would be so much faster I believe.
+ < braunr> than what ?
+ < nlightnfotis> building gcc on the hurd
+ < nlightnfotis> I remember it taking about 10minutes with make -j4 on the
+ host
+ < nlightnfotis> it takes 45-50 minutes on the vm (kvm enabled)
+ < braunr> but you can merely rebuild the files you've changed
+ < nlightnfotis> I feel stupid now...
+ < braunr> nlightnfotis: have you tried setting GOMAXPROCS to 1 ?
+ < nlightnfotis> not really, but from what I know GOMAXPROCS defaults to 1
+ if not set
+ < braunr> again, check that
+ < braunr> take the habit of checking things
+ < nlightnfotis> braunr: yeah sorry for that. I have checked these things
+ out before they don't come out of my head I just don't remember exactly
+ where I had seen this
+ < braunr> what you can also do is use gdb to catch the assertion and check
+ the number of threads at that time, as well as the number of threads as
+ seen by libpthread
+ < nlightnfotis> braunr: line 492 file proc.c: runtime_gomaxprocs = 1;
+ < braunr> also see runtime.LockOSThread
+ < braunr> to make sure the main thread is locked to its own pthread
+ < nlightnfotis> I can see in line 529 of the same file that the first
+ thread is getting locked
+ < nlightnfotis> the new threads that get initialised are non main threads
+ < braunr> if(!runtime_sched.lockmain) runtime_UnlockOSThread();
+ < braunr> i'm suggesting you set runtime_sched.lockmain
+ < braunr> so it remains true for the whole execution
+ < braunr> this code looks like a revamp of plan9 lol
+ < nlightnfotis> it is
+ < nlightnfotis> in the paper from Ian Lance Taylor describing gccgo he
+ states somewhere that the original go compilers (the 3gs) are a modified
+ version of plan9's C compiler, and that gccgo tries to follow them
+ < nlightnfotis> they differ in a lot of ways though
+ < nlightnfotis> the 3gs generate a lot of code during link time
+ < nlightnfotis> gccgo follows the standard gcc procedures
+ < braunr> eh :D
+ < nlightnfotis> go -> gogo -> generic -> gimple -> rtl -> object
+ < nlightnfotis> that's how it flows as far as I recall
+ < nlightnfotis> gogo is an internal representation of go's structure inside
+ the gccgo frontend
+ < nlightnfotis> that's why you see many functions with gogo in their name
+ < nlightnfotis> I just revisited the paper: gogo is there to make it easy
+ to implement whatever analysis might seem desirable. It mirrors however
+ the Go source code read from the input files
+ < braunr> nlightnfotis: what are you trying now ?
+ < nlightnfotis> I am basically studying the runtime's source code while
+ waiting for gccgo to compile on the Hurd
+ < nlightnfotis> yes I did the stupid whole recompilation again. :/
+ < braunr> nlightnfotis: compile for what ?
+ < braunr> what test ?
+ < nlightnfotis> to check out to see if M's really are added to the pool
+ instead of getting deleted
+ < braunr> nlightnfotis: but how ?
+ < nlightnfotis> braunr: I have added a statement in mput if we get there
+ first, and secondly the number of threads that the runtime scheduler
+ knows that are waiting (are in the pool of m's waiting for work)
+ < braunr> ok
+ < braunr> when you can, i'd really like you to do this test :
+ < braunr> 15:55 < braunr> what you can also do is use gdb to catch the
+ assertion and check the number of threads at that time, as well as the
+ number of threads as seen by libpthread
+ < nlightnfotis> the number of threads required by libpthread is gonna need
+ me to recompile the whole eglibc right?
+ < braunr> no
+ < braunr> just print it with gdb
+ < nlightnfotis> oh, ok
+ < braunr> it's __pthread_num_threads
+ < nlightnfotis> is gdb reliable? I remember thomas telling me that I can't
+ trust gdb at this point in time
+ < braunr> and also __pthread_total
+ < braunr> really ?
+ < braunr> i don't see why not :/
+ < braunr> youpi: any idea about what nlightnfotis is speaking of ?
+ < nlightnfotis> I may have misunderstood it; don't take it by heart
+ < nlightnfotis> I don't wanna put words in other people's mouths because I
+ misunderstood something
+ < braunr> sure
+ < braunr> that's my habit to check things
+ < youpi> braunr: nope
+ < braunr> youpi: and am i right when i say we don't use context functions
+ on the hurd, and they're likely to be incomplete, even with the recent
+ changes from thomas ?
+ < braunr> (mcontext, ucontext)
+ < nlightnfotis> braunr: this is what had been said: 08:46:30< tschwinge> As
+ this is a parallel problem, and it is involving "advanced" things (such
+ as setcontext), I would not trust GDB too much when used on this code.
+ < pinotree> if thomas' changes were complete and polished, i guess he would
+ have sent them upstream already
+ < braunr> i see but
+ < braunr> you can normally trust gdb for global variables
+ < nlightnfotis> Didn't post it as an objection; I posted it because I felt
+ bad putting the wrong words on other people's mouths, as I said
+ before. So I posted his original comment which was more authoritative
+ than my interpretation of it
+ < braunr> i wonder if there is a tunable to strictly map one thread to one
+ goroutine
+ < braunr> nlightnfotis: more focus on the work, less on the rest please
+ < nlightnfotis> Did I do something wrong?
+ < braunr> you waste too much time apologizing
+ < braunr> for no reason
+ < braunr> nlightnfotis: i suppose you don't use splitstack, right ?
+ < nlightnfotis> no I didn't
+ < nlightnfotis> and here's something interesting: The code I just added, in
+ mput, to see if threads are added in the pool. It's not there, no matter
+ what I run
+ < nlightnfotis> So it seems that we the runtime is not reaching mput.
+ < nlightnfotis> Could this be normal behavior? I mean, on process
+ termination just release the resources so mput is skipped?
+ < braunr> i don't know the code well enough to answer that
+ < braunr> check closer to the lower interface
+
+
+# IRC, freenode, #hurd, 2013-08-25
+
+ < nlightnfotis> braunr: what is initcontext supposed to be doing?
+ < braunr> nlightnfotis: didn't look
+ < braunr> i'll take a look later
+ < nlightnfotis> braunr: I am buffled by it. It seems to be doing nothing on
+ the Hurd branch and nothing in the Linux branch either. Why call a
+ function that does nothing? (it doesn't only seem to do nothing, I have
+ confirmed it)
+ < nlightnfotis> youpi: I was wondering if you could explain me
+ something. What is the initcontext function supposed to be doing?
+ < youpi> you mean initcontext ?
+ < nlightnfotis> yes
+ < youpi> ergl
+ < youpi> you mean makecontext?
+ < nlightnfotis> no initcontext. I am faced with this in the goruntime. It's
+ called in it, but it is doing nothing. Neither in the Hurd tree, nor in
+ the Linux one
+ < youpi> I don't know what initcontext is
+ < youpi> where do you read it?
+ < nlightnfotis> youpi: let me show you
+ < nlightnfotis>
+ https://github.com/NlightNFotis/gcc/blob/fotisk/goruntime_hurd/libgo/runtime/proc.c#L80
+ < nlightnfotis> and it is called in quite a few places
+ < youpi> it's not doing nothing, see other implementations
+ < pinotree> if SETCONTEXT_CLOBBERS_TLS is not defined, initcontext and
+ fixcontext do nothing
+ < pinotree> otherwise (presuming if setcontext clobbers tls) there are two
+ implementations for solaris/x86_64 and netbsd
+ < youpi> I don't think we have the tls clobber bug
+ < youpi> so these functions being empty is completely fine
+ < nlightnfotis> pinotree: oh, you mean it's used as a workaround for these
+ two systems only?
+ < youpi> yes
+ < pinotree> yes
+ < nlightnfotis> That makes sense. Thanks both of you for the help :)
+ < nlightnfotis> youpi: if this counts as some progress, I have traced the
+ exact bootstrapping sequence of a new go process. I know a good deal of
+ what is done from it's spawn to it's end. There are some things I wanna
+ sort out, and later tonight I will write my report for it to be ready for
+ tomorrow.
+ < youpi> good
+
+
+# IRC, freenode, #hurd, 2013-08-26
+
+ < nlightnfotis> Hi everyone, my report is here
+ http://www.fotiskoutoulakis.com/blog/2013/08/26/gsoc-week-10-report/
+ < youpi> nlightnfotis: you should clearly put printfs inside libpthread
+ < youpi> to check what is happening with the ktids
+ < nlightnfotis> youpi: yep, that's my next course of action. I just want to
+ spend some more time in the go runtime to make sure that I understand the
+ flow perfectly, and to make sure that it is not the runtime's fault
+ < braunr> nlightnfotis: did you try gdb to print the number of threads ?
+ < youpi> nlightnfotis: to build it, the easiest way is to start building
+ eglibc, and when you see it compiling C files (i.e. run i486-gnu-gcc-4.7
+ etc.)
+ < youpi> stop it
+ < youpi> and go into build/hurd-i386-libc, and run "make others" from there
+ < nlightnfotis> braunr: that was my plan for today or tomorrow :)
+ < braunr> start building *debian* glibc
+ < youpi> there's perhaps some way to only build libpthread, but I don't
+ remember
+ < braunr> nlightnfotis: ok
+ < braunr> youpi: i suggested he tried gdb first
+ < youpi> why not
+ < braunr> if you need quick glibc builds, you can use darnassus
+ < nlightnfotis> braunr: how much time on average should I expect it to
+ take?
+ < youpi> it highly depends on the machine
+ < youpi> it can be hours
+ < youpi> or a few minutes
+ < youpi> depending you already have a built tree, a fast disk, etc.
+ < braunr> make lib others on darnassus takes around 30 minutes
+ < braunr> a complete dpkg-buildpackage from fresh sources takes 5-6 hours
+ < braunr> make others from a built tree is very quick
+ < braunr> a few minutes at most
+ < braunr> nlightnfotis: i don't see any trace of thread exiting in your
+ report, is that normal ?
+ < nlightnfotis> yeah, I guess, since they don't exit prematurely, they are
+ released along with other resources at the process' exit
+ < braunr> i'll rephrase
+ < braunr> you said last time that you saw a function never got called
+ < braunr> i assumed it was because a thread exited prematurely
+ < nlightnfotis> oh I sorted it out with the help of youpi and pinotree
+ yesterday
+ < braunr> that's different
+ < braunr> i'm not talking about the function that does nothing
+ < braunr> i'm talking about the one never called
+ < nlightnfotis> oh, go on then,
+ < braunr> i don't remember its name
+ < braunr> anyway
+ < nlightnfotis> abort()?
+ < braunr> i hope abort doesn't get called :)
+ < nlightnfotis> it doesn't
+ < braunr> i thought it was the one right before
+ < braunr> what i mean is
+ < nlightnfotis> oh runtime_mstart, it does get called
+ < braunr> add traces at thread exit points
+ < nlightnfotis> I sorted it out too
+ < braunr> make *sure* threads don't exit
+ < nlightnfotis> it get's called to start the kernel thread created at
+ process spawn at the runtime_schedinit
+ < braunr> if they really don't, it's probably a context/tls issue
+ < nlightnfotis> I will do this right now.
+ < nlightnfotis> braunr: if it's a context/tls issue it's libpthread's
+ problem?
+
+
+# IRC, freenode, #hurd, 2013-09-02
+
+ <nlightnfotis> Hello! My report for this week is online:
+ http://www.fotiskoutoulakis.com/blog/2013/09/02/gsoc-week-11-report/
+ <braunr> nlightnfotis: there always is a signal thread in every hurd
+ program
+ <braunr> nlightnfotis: i also pointed out that there are two variables
+ involved in counting threads in libpthread, the other one being
+ __pthread_num_threads
+ <braunr> again, more attention to work and details, less showmanship
+ <braunr> i'm tired of repeating it
+ <youpi> nlightnfotis: doesn't backtrace work in gdb to tell you what
+ 0x01da48ec is?
+ <youpi> also, do you have libc0.3-dbg installed?
+ <nlightnfotis> braunr: __pthread_num_threads reports is 4.
+ <braunr> then why isn't it in your report ?
+ <braunr> it's acceptable that you overlook it
+ <nlightnfotis> and youpi: yeah I have got the backtrace, but 0x01da48ec is
+ ?? () from /lib/i386-gnu/libc.so.3
+ <braunr> it's NOT when someone else has previously mentioned it to you
+ <youpi> nlightnfotis: only that line, no other line?
+ <nlightnfotis> it has 8 more youpi, the one after ?? is mach_msg ()
+ form/lib/gni386-gnu/libc.so.0.3
+ <braunr> yes mach_msg
+ <braunr> almost everything ends up in mach_msg
+ <youpi> you should probably pastebin somewhere the output of thread apply
+ all bt
+ <braunr> what's before that ?
+ <nlightnfotis> braunr: I don't know how I even missed it. I skimmed through
+ the code and only found __pthread_total and assumed that it was the total
+ number of threads
+ <braunr> nlightnfotis: i don't know either
+ <braunr> take notes
+ <nlightnfotis> before mach_msg ins __pthread_timedblock () from
+ /lib/i386-gnu/libpthread.so.0.3
+ <nlightnfotis> I will add it to pastebin in a second
+ <braunr> i find it very disappointing that after several weeks blocking on
+ this, despite all the pointers you've been given, you still haven't made
+ enough progress to reach the context switching functions
+ <braunr> last week, most progress was made when we talked together
+ <braunr> then nothing
+ <braunr> it seems that you disappear, apparently searching on your own
+ <braunr> but for far too long
+ <nlightnfotis> braunr: I do search on my own, yes,
+ <braunr> almost like exploiting being blocked not to make progress on
+ purpose ...
+ <braunr> but too much
+ <nlightnfotis> braunr: I am not doing this on purpose, I believe you are
+ unfair to me. I am trying to make as much progress as I can alone, and
+ reach out only when I can't do much more alone
+ <braunr> then why is it only now that we get replies to questions such as
+ "how much is __pthread_num_threads" ?
+ <braunr> why do you stop discussions for almost a week, just to find
+ yourself blocked again ?
+ <nlightnfotis> I was working on gcc, going through the runtime making sure
+ about assumptions and going through various other goroutine or not
+ programs through gdb
+ <braunr> that doesn't take a week
+ <braunr> clearly not
+ <braunr> last time we talked was
+ <braunr> 10:40 < nlightnfotis> braunr: if it's a context/tls issue it's
+ libpthread's problem?
+ <nlightnfotis> it did for me... honestly, what is it you believe I am doing
+ wrong? I too am frustrated by my lack of progress, but I am doing my best
+ <braunr> august 26
+ <nlightnfotis> yeah, I wanted to make sure about certain assumptions on the
+ gcc side. I don't want to start hacking on libpthread only to see that it
+ might have been something I msissed on the gcc side
+ <braunr> i told you
+ <braunr> it's probably not a libpthread issue
+ <braunr> the assertion is
+ <braunr> but it's minor
+ <braunr> it's not the realy problem, only a side effect
+ <braunr> i told you about __pthread_num_threads, why didn't you look at it
+ ?
+ <braunr> i told you about context switching functions, why nothing about it
+ ?
+ <braunr> doing a few printfs to check numbers and using gdb to check them
+ at break points should be quick
+ <braunr> when we talk,ed we had the results in a few minutes
+ <nlightnfotis> yeah, because I was guided, and that helped me target my
+ research. On my own things are quite different. I find out something
+ about gcc's behavior, then find out I need tons more information, and I
+ have a lot of things that I need to research to confirm any assumptions
+ from my side
+ <braunr> how did you miss the signal thread ?
+ <braunr> we even talked about it right here with hacklu
+ <braunr> i'll say it again
+ <braunr> if blocked more than one day, ask for help
+ <braunr> 2 days minimum each time is just too long
+ <nlightnfotis> I'm sorry. I will be online every day from now on and report
+ every 10 minutes, on my course of actions.
+ <nlightnfotis> I recognise that time is off the essence at this point in
+ time
+ <braunr> it's also NO
+ <braunr> NO
+ <braunr> *SIGH*
+ <hacklu> nlightnfotis: calm down. braunr just want to help you solve
+ problem quickly.
+ <braunr> 10 minutes is the other extreme
+ <hacklu> nlightnfotis: in my experiecence, if something block me, I will
+ keep asking him until I solve the problem.
+ <braunr> it's also very frustrating to see you answer questions quickly
+ when you're here, then wait days for unanswered questions that could have
+ taken little time if you kept being here
+ <braunr> this just gives the impression that you're doing something else in
+ parallel that keeps you busy
+ <braunr> and comfort me in believing you're not being serious enough
+ aboutit
+ <nlightnfotis> yeah, I understand that it gives that impression. The only
+ thing I can tell you now, is that I am *not* doing something else in
+ parallel. I am only trying to demonstrate some progress alone, and when
+ working alone things for me take quite some more time than when I am
+ guided
+ <braunr> hacklu: i'm actually the nervous one here
+ <nlightnfotis> braunr: ok, I understand I have dissapointed you. What would
+ you suggest me to do from now on?
+ <hacklu> braunr: :)
+ <braunr> manage your time correctly or you'll fail
+ <braunr> i'm not the main mentor of this project so it's not for me to
+ decide
+ <braunr> but if i were, and if i had to wait again for several days before
+ any notice of progress or blocking, i wouldn't even wait for the end of
+ the gsoc
+ <braunr> you're confronted with difficult issues
+ <braunr> tls, context switching, thread
+ <braunr> ing
+ <braunr> they're all complicated
+ <braunr> unless you're very experienced and/or gifted, don't assume you can
+ solve it on your own
+ <braunr> and the biggest concern for me is that it's not even the main
+ focus of your project
+ <braunr> you should be working on go
+ <braunr> on porting
+ <braunr> any side issues should be solved as quickly as possible
+ <braunr> and we're now in september ...
+ <nlightnfotis> go is working quite alright. It's goroutines that have
+ issues.
+ <braunr> nlightnfotis: same thing
+ <braunr> goroutines are part of go as far as i'm concerned
+ <braunr> and they're working too, something in the hurd isn't
+ <braunr> so it's a side issue
+ <braunr> you're very much entitled to ask as much help as you need for side
+ issues
+ <braunr> and i strongly feel you didn't
+ <nlightnfotis> yeah, you're right. I failed on that aspect, mainly because
+ of the way I work. I wanted to show some progress on my own, and not be
+ here and spam all day. I felt that spamming questions all day would
+ demonstrate incompetence from my side
+ <nlightnfotis> and I wanted to show that I am capable of solving my
+ problems on my own.
+ <braunr> well, in a sense it does, but that's not the skills we were
+ expecting from you so it's perfectly ok
+ <braunr> nlightnfotis: no development group, even in companies, in their
+ right mind, would expect you to grasp the low level dark details of an
+ operating system implementation in a few weeks ...
+ <nlightnfotis> braunr: ok, may I ask what you suggest to me that my next
+ course of action is?
+ <braunr> let me see
+ <braunr> nlightnfotis: your report mentions runtime_malg
+ <nlightnfotis> yes, I runtime malg always returns a new goroutine
+ <braunr> nlightnfotis: what's the problem ?
+ <nlightnfotis> a new m created is assigned a new goroutine via runtime_malg
+ <nlightnfotis> what happens to that goroutine? Is it destroyed? Because it
+ seems to be a bogus goroutine. Why isn't the kernel thread instantly
+ picking the one goroutine available at the global goroutine pool?
+ <braunr> let's see if it's that hard to figure out
+ <nlightnfotis> seeing as m's and g's have a 1:1 (in gccgo) relationship,
+ and a new kernel thread is created everytime there is a new goroutine
+ there to run.
+ <braunr> are you sure about that 1:1 relationship ?
+ <braunr> i hardly doubt it
+ <braunr> highly*
+ <nlightnfotis> yeah, that's what I thought too, but then again, my research
+ so far shows that when a new goroutine is created, a new kernel thread
+ creation follows suit
+ <nlightnfotis> what I have mentioned of course, happens in runtime_newm
+ <braunr> nlightnfotis: that's when you create a new m, not a new g
+ <nlightnfotis> yes, a new m is created when you create a new g. My issue is
+ that during m's creation, a new (bogus) g is created and assigned to the
+ m. I am looking into what happens to that.
+ <braunr> nlightnfotis: "a new m is created when you create a new g", can
+ you point me to the code ?
+ <nlightnfotis> braunr: matchmg line 1280 or close to that. Creates new m's
+ to run new g's up to (mcpumax)
+ <braunr> "Kick off new m's as needed (up to mcpumax)."
+ <braunr> so basically you have at most mcpumax m
+ <nlightnfotis> yeah. but for a small number of goroutines (as for example
+ in my experiments), a new m is created in order to run a new g.
+ <braunr> runtime_newm is called only if mget(gp)) == nil
+ <braunr> be rigorous please
+ <braunr> when i ask
+ <braunr> 11:01 < braunr> are you sure about that 1:1 relationship ?
+ <braunr> this conclusively proves it's *false*
+ <braunr> so don't answer yes to that
+ <braunr> it's true for a small number of goroutines, ok
+ <braunr> and at startup
+ <braunr> because then, mget returns an existing m
+ <braunr> nlightnfotis: this g0 goroutine is described in the struct as
+ <braunr> G runtime_g0; // idle goroutine for m0
+ <braunr> runtime_malg builds it with just a stack
+ <braunr> apparently, that's the goroutine an m runs when there are no g
+ left
+ <braunr> so yes, the idle one
+ <braunr> it's not bogus
+ <nlightnfotis> I thought m0 and g0 where the bootstrap m and g for the
+ scheduler.
+ <nlightnfotis> *correction: runtime_m0 and runtime_g0
+ <braunr> hm i got a bit fast
+ <braunr> G* g0; // goroutine with scheduling stack
+ <nlightnfotis> braunr: scheduling stack with stacksize = -1?
+ <nlightnfotis> unless it's not used as a parameter
+ <nlightnfotis> let me investigate that
+ <nlightnfotis> yeah now that I am seeing it, it might make sense, if it
+ using a default stack size, #defined as StackMin
+ <braunr> g0 looks like a placeholder
+ <braunr> i think it's used to reuse switching code when there is only one
+ goroutine involved
+ <braunr> e.g. when starting
+ <braunr> anyway i don't think we should waste too much time with it
+ <braunr> nlightnfotis: try to make a real 1:1 mapping
+ <braunr> that's something else i suggested last time
+ <nlightnfotis> braunr: ok. Where do you suspect the problem lies?
+ <braunr> context switching
+ <nlightnfotis> inside the goruntime?
+ <braunr> in glibc
+ <braunr> try to use runtime.LockOSThread
+ <braunr> http://code.google.com/p/go-wiki/wiki/LockOSThread
+ <braunr> nlightnfotis: http://golang.org/pkg/runtime/ is probably better
+ <nlightnfotis> what exactly do you mean by `use runtime.LockOSThread`?
+ LockOSThread locks the very first m and goroutine as the main threads
+ during process initialisation
+ <nlightnfotis> in proc.c line 565 or something
+ <braunr> i'm not sure it will help, because the problem is likely to occur
+ before even switching to the goroutine that locks its m, but worth trying
+ <braunr> 11:28 < braunr> nlightnfotis: http://golang.org/pkg/runtime/ is
+ probably better
+ <braunr> the first example is specific to GUIs that have requirements on
+ the main thread
+ <braunr> whereas i want every goroutine to run in its own thread
+ <nlightnfotis> I have also noticed that some context switching happens in
+ the goruntime even with a low number of goroutines and kernel threads
+ <braunr> that's expected
+ <braunr> goroutines must be viewed as works, and ms as worker threads
+ <braunr> everytime a goroutine sleeps, its m should be switching to useful
+ work
+ <braunr> nlightnfotis: i'd make prints (probably using mach_print) of
+ contexts when saved and restored
+ <braunr> and try to see if it makes any sense
+ <braunr> that's not simple to setup but not overly complicated either
+ <braunr> don't hesitate to ask for help
+ <nlightnfotis> from inside glibc, right?
+ <braunr> yes
+ <braunr> well
+ <braunr> no from go
+ <braunr> don't touch glibc from now
+ <braunr> put these prints near calls to makecontext/swapcontext
+ <braunr> and setcontext/getcontext
+ <braunr> wel
+ <braunr> you'll be using getcontext i think
+ <nlightnfotis> noted it all. I also have the gdb output you asked me for
+ http://pastebin.com/LdnMQDh1
+ <braunr> i don't see main
+ <nlightnfotis> some notes first: The main thread is the one with id 4, and
+ the output on the top is its backtrace.
+ <braunr> and main.main is run in thread 6
+ <nlightnfotis> Remember that main when it comes to go is in the file
+ go-main.c
+ <braunr> so main becomes runtime_MHeap_Scavenger
+ <nlightnfotis> yeah, main.main is the code of the program, (the one the
+ user wrote, not the runtime)
+ <nlightnfotis> yeah, it becomes a gc thread
+ <nlightnfotis> seeing as runtime_starttheworld reports that there is
+ already one gc thread
+ <braunr> and how much are __pthread_total and __pthread_num_threads for
+ that trace ?
+ <nlightnfotis> they were: __pthread_total = 2, and __pthread_num_threads =
+ 4
+ <braunr> can you paste the assertion again please, just to make sure
+ <nlightnfotis> a.out: ./pthread/pt-create.c:167: __pthread_create_internal:
+ Assertion `({ mach_port_t ktid = __mach_thread_self (); int ok =
+ thread->kernel_thread == ktid;
+ <nlightnfotis> __mach_port_deallocate ((__mach_task_self + 0), ktid); ok;
+ })' failed.
+ <braunr> btw, install the -dbg packages too
+ <nlightnfotis> dbg for which one? gccgo?
+ <braunr> libc0.3
+ <braunr> pthread/pt-create.c:167 is __pthread_sigstate (_pthread_self (),
+ 0, 0, &sigset, 0); here :/
+ <braunr> that assertion should be in __pthread_thread_start
+ <braunr> let's just say gdb is confused
+ <pinotree> braunr: apt-get source eglibc ; cd eglibc-* ; debian/rules patch
+ <braunr> pinotree: i have
+ <braunr> and that assertion can only trigger if __pthread_total is 1
+ <braunr> so let's say it just got to 2
+ <nlightnfotis> it does from very early on in process initialisation
+ <nlightnfotis> let me check this out again
+ <braunr> hm
+ <braunr> actually, both __pthread_total and __pthread_num_threads must be 1
+ <braunr> the context functions might be fine actually
+ <nlightnfotis> braunr: __pthread_num_threads = 2 right from the start of
+ the program
+ <nlightnfotis> 0x01da48ec is in mach_msg_trap
+ <braunr> something happened with libpthreads recently ..
+ <braunr> i can't even start iceweasel
+ <pinotree> braunr: what's the error?
+ <braunr> iceweasel: ./pthread/../sysdeps/generic/pt-mutex-timedlock.c:70:
+ __pthread_mutex_timedlock_internal: Assertion `__pthread_threads' failed.
+
+But not the [[open_issues/libpthread_dlopen]] issue?
+
+ <braunr> considering __pthread_threads is a global variable, this is tough
+ <braunr> i wonder if that's the issue with nlightnfotis's work
+ <braunr> wrong symbol resolution, leading libpthread to consider there is
+ only one thread running
+ <pinotree> try with LD_PRELOAD=/lib/i386-gnu/libpthread.so.0 iceweasel
+ <braunr> same
+ <braunr> maybe the switch to glibc 2.17
+ <braunr> this assertion is triggered by __pthread_self, assert
+ (__pthread_threads);
+ <braunr> __pthread_threads being the array of thread pointers
+ <braunr> so either corrupted (but we hardly changed anything ...) or wrong
+ resolution
+ <braunr> __pthread_num_threads includes the signal thread, __pthread_total
+ doesn't
+ <nlightnfotis> braunr: I recompiled with the libc debugging symbols and I
+ have new information
+ <nlightnfotis> the threads block at mach_msg_trap
+ <braunr> again, almost everything blocks there
+ <braunr> mach_msg is mach ipc, the way hurd system calls are implemented
+ <nlightnfotis> and the next calls (if it didn't block, from what I can see
+ from eip) are mach_reply_port and mach_thread_self
+ <braunr> please paste it
+ <nlightnfotis> yes give me 2 mins plz, brb
+ <braunr> pinotree: looks different for firefox
+ <braunr> it seems it calls pthread_key_create before pthread_create
+ <braunr> something our libpthread doesn't handle correctly
+ <nlightnfotis> braunr: http://pastebin.com/yNbT7nLn
+ <pinotree> braunr: what do you mean?
+ <braunr> pinotree: i mean libpthread needs to be fixed so thread-specific
+ data can be set even without a call to pthread_create
+ <braunr> nlightnfotis: hum, we already knew it was blocking in a semaphore
+ <braunr> nlightnfotis: ok forget the other things i told you to test
+ <braunr> nlightnfotis: track __pthread_total and __pthread_num_threads
+ <braunr> add prints (again, with mach_print) to see when (and why) they
+ change and go back to 1
+ <pinotree> braunr: i see that pthread_key_create uses a mutex which in
+ turns needs _pthread_self(), but shouldn't at least one pthread_create be
+ done (directly by libc for the main thread)?
+ <braunr> pinotree: no :)
+ <braunr> well
+ <braunr> it should have been for the signal thread indeed
+ <braunr> and the signal thread exists
+ <pinotree> and the main thread?
+ <braunr> not the main, no
+ <pinotree> how so?
+ <braunr> a simple test program shows it does indeed work ..
+ <braunr> so this is again another problem in firefox too
+ <nlightnfotis> braunr: I don't think I understand this. I mean how can
+ pthread_total and __pthread_num_thread turn to 1, when , right before and
+ right after the crash they have numbers between 2, 3, and 4?
+ <braunr> how did you get their values "right before" the crash ?
+ <nlightnfotis> I have set a breakpoint to a printing function right before
+ the go statement
+ <nlightnfotis> (right before in this context, in the application code, not
+ the runtime code, but then again, I don't really think they are too far
+ each other)
+ <braunr> well, that's the mystery
+ <nlightnfotis> I am not challenging what you said, I will of course do,
+ just asking to understand some things
+ <braunr> they may either turn to 1, or there is some mess with symbol
+ resolution leading threads to see a value of 1
+ <nlightnfotis> *do it
+ <braunr> there*
+ <nlightnfotis> braunr: ping
+ <teythoon> just ask ;)
+ <nlightnfotis> teythoon: have you used mach_print?
+ <teythoon> no
+ <nlightnfotis> I have some questions about it
+ <teythoon> ask them
+ <nlightnfotis> I was told to use them inside go's runtime, to print the
+ values of __pthread_total and __pthread_num_threads. The thing is, these
+ values (I believe) are unknown to the runtime, they are only known to the
+ executable (linking time and later)
+ <teythoon> so? if the requested information is bound to a symbol that is
+ resolved at link time, you can print it from within the runtime
+ <teythoon> the same way any function from the libc is not known to the
+ executable until linking against it, but you can still "use" it in your
+ executable
+ <nlightnfotis> yeah, ok I understand that, but these are references that
+ are resolved at link time. The values I want to print are totally unknown
+ to the runtime (0 references to them)
+ <teythoon> if the value you are interested in is bound to the symbol
+ __pthread_total at link time, then you've got a reference you can use
+ <teythoon> doesn't printing __pthread_total work? did you try that?
+ <nlightnfotis> no, whenever I printed these values I did it from gdb. I am
+ trying to do what you suggested atm
+ <braunr> nlightnfotis: im here
+ <braunr> printing those values from libgo will tell us what value libgo
+ actually sees
+ <nlightnfotis> I am trying to use mach_print. Could you give me some
+ pointers on its usage (inside the goruntime?) (I have already read your
+ document here
+ http://www.gnu.org/software/hurd/microkernel/mach/gnumach/interface/syscall/mach_print.html
+ and the example code)
+ <braunr> and symbol resolution may depend on where it's done from
+ <braunr> nlightnfotis: first, it only work with -dbg kernels
+ <braunr> so make sure you're running one
+ <braunr> actually, i'll write you a patch
+ <braunr> including a mach_printf function with argument parsing
+ <nlightnfotis> isn't it on by default? I read that on the document you are
+ discussing mach_printf
+ <nlightnfotis> ahh ok
+ <braunr> it's on by default on -dbg kernels
+ <braunr> i'll make a repository on darnassus too
+ <braunr> better store it there
+ <braunr> nlightnfotis:
+ http://darnassus.sceen.net/gitweb/rbraun/mach_print.git/
+ <braunr> nlightnfotis: i suggest you implement mach_print with inline asm
+ statement in a C file, so that you don't need to alter the build system
+ configuration
+ <braunr> i'll make an example of that too
+ <nlightnfotis> braunr: that wasn't a problem. My only real problem atm is
+ that __atomic_t isn't recognised as a type, and I can not find the header
+ file for it on Hurd
+ <nlightnfotis> it was pt-internal.h in libpthread
+ <braunr> ah
+ <braunr> nlightnfotis: just in case, i updated the repository with an
+ inline assembly version
+ <braunr> let's see about __atomic_t
+ <braunr> sysdeps/i386/bits/pt-atomic.h:typedef __volatile int __atomic_t;
+ <braunr> nlightnfotis: just redeclare it as this locally
+ <braunr> nlightnfotis: ok ?
+ <nlightnfotis> I am working on it, because I still haven't found what
+ __atomic_t is typedefed from. Thinking of typedefing an int to it and see
+ how it goes
+ <nlightnfotis> braunr: found it just now: __volatile int
+ <braunr> "just now" ?
+ <braunr> 14:19 < braunr> sysdeps/i386/bits/pt-atomic.h:typedef __volatile
+ int __atomic_t;
+ <nlightnfotis> I was using cscope all this time
+ <braunr> why use cscope at all when i tell you where it is ?
+ <nlightnfotis> because I didn't notice it: your discussion was between
+ pino's and srs' and I wasn't tagged and thought it had something to do
+ with their discussion
+ <pinotree> (sorry)
+ <nlightnfotis> no it was my bad
+ <braunr> ok
+ <braunr> pinotree: there is indeed a special call to
+ __pthread_create_internal for the main thread
+ <pinotree> yeah
+ <pinotree> braunr: if there wouldn't be that libc→pthread bridge, things
+ like pthread_self() or so wouldn't work for the main thread
+ <braunr> pinotree: right
+ <pinotree> braunr: weird thing is that the error you got is usually a sign
+ that pthread is not linked in explicitly
+ <braunr> pinotree: yes
+ <braunr> pinotree: with firefox, gdb can't locate pthread symbols before a
+ call to a pthread function
+ <braunr> so yes, libpthread is loaded after main is called
+ <braunr> nlightnfotis: can you give me a quick procedure to build gcc with
+ go support from your repository, and then test a go program please ?
+ <braunr> to i can have a better look at it myself
+ <braunr> so*
+ <nlightnfotis> braunr: sure you want access to my go repo? If you already
+ have gcc repo add my github repo as a remote and checkout
+ fotisk/goruntime_hurd
+ <braunr> i have your github repo
+ <nlightnfotis> git checkout fotisk/goruntime_hurd (You may need to revert a
+ commit or two, because of my latest endeavour with mach_print
+ <nlightnfotis> braunr: check it out now, I reverted some messy commits for
+ you to rebuild
+ <braunr> nlightnfotis: i won't work on it right now, i'm building glibc to
+ check some things in libpthread
+ <braunr> since it seems to be the source of your problems and many others
+ <nlightnfotis> oh ok then. btw, it compiles ok, but when I try to compile
+ another program with gccgo collect2 cries about undefined references to
+ __pthread_num_threads and __pthread_total
+ <braunr> Oo
+ <braunr> another program ?
+ <nlightnfotis> braunr: will I get the same result if I slowly go through it
+ with gdb
+ <nlightnfotis> yep
+ <braunr> i don't understand
+ <braunr> what compiles ok, what fails ?
+ <nlightnfotis> gccgo compiles without errors (which is strange) but when I
+ use it to compile goroutine.go it fails with the errors I reported
+ <pinotree> (missing linking to pthread?)
+ <braunr> since when ?
+ <nlightnfotis> pinotree: perhaps braunr: since I made the changes with
+ mach_print
+ <nlightnfotis> pinotree: but what could be missing the link? GCC compiled
+ programs are getting linked automatically to the shared objects of the
+ headers they include right?
+ <nlightnfotis> (assuming it's not a huge program, only a tiny 10 liner for
+ instance)
+ <braunr> uh
+ <braunr> did you declare them as extern
+ <braunr> ?
+ <nlightnfotis> yes
+ <braunr> do you see -lpthread on the link line ?
+ <nlightnfotis> during gcc's compilation? I will have to rerun it again and
+ see.
+ <braunr> log the compilation output somewhere once
+ <braunr> nlightnfotis: why did you remove volatile from the definition of
+ __atomic_t ??
+ <nlightnfotis> just for testing purposes, because I thought that the GNU
+ version is volatile with no __ in front of it and that might cause some
+ issues.
+ <braunr> i don't understand
+ <nlightnfotis> it was just an experiment gone wrong
+ <braunr> nlightnfotis: keep volatile there
+ <nlightnfotis> just did
+ <nlightnfotis> braunr: there is -lpthread on some lines. For instance when
+ libtool is invoked.
+ <youpi> braunr: the pthread assertion usually happens when libpthread gets
+ loaded from a plugin, I guess mozilla got rid of libpthread in the main
+ application recently, simply
+ <pinotree> youpi: he said that the LD_PRELOAD trick (which used to
+ workaround the issue in older iceweasel) does not work, though
+ <youpi> ah? it does work for me
+ <pinotree> dunno then...
+ <braunr> youpi: aouch, ok
+ <braunr> nlightnfotis: what about the specific gcc invocation that fails ?
+ <braunr> pinotree: /lib/i386-gnu/libpthread.so.0: ERROR: cannot open
+ `/lib/i386-gnu/libpthread.so.0' (No such file or directory)
+ <braunr> trying with a working path this time
+ <braunr> better
+ <pinotree> sorry, i typed it by hand :p
+ <braunr> Segmentation fault
+ <braunr> but no assertion
+ <nlightnfotis> braunr: gccgo hello.go
+ <braunr> nlightnfotis: ?
+ <pinotree> <braunr> nlightnfotis: what about the specific gcc invocation
+ that fails ?
+ <braunr> nlightnfotis: i'm asking if -lpthread is present when you have
+ these undefined reference errors
+ <nlightnfotis> it is. it seems so
+ <nlightnfotis> I wrote above that it is present when libtool is called
+ <nlightnfotis> I don't know what libtool is doing sadly
+ <braunr> you said some lines
+ <nlightnfotis> but I from what I've seen I believe it does some kind of
+ linking
+ <braunr> paste it somewhere please
+ <nlightnfotis> yeah it doesn't fail though
+ <braunr> that's far too vague ...
+ <braunr> it doesn't fail ?
+ <nlightnfotis> give me a second
+ <braunr> i thought it did
+ <nlightnfotis> no it doesn't
+ <braunr> 14:53 < nlightnfotis> gccgo compiles without errors (which is
+ strange) but when I use it to compile goroutine.go it fails with the
+ errors I reported
+ <nlightnfotis> yeah gccgo compiles.
+ <nlightnfotis> when I use the compiler, it fails
+ <braunr> so it fails running
+ <braunr> is gccgo built with -lpthread itself ?
+ <nlightnfotis> http://pastebin.com/1TkFrDcG
+ <nlightnfotis> check it out
+ <nlightnfotis> I think it does, but I would take an extra opinion
+ <nlightnfotis> line 782
+ <nlightnfotis> and 784
+ <braunr> (are you building as root ?)
+ <nlightnfotis> yes. for now
+ <pinotree> baaad :p
+ <nlightnfotis> I never had any particular problems...except that one time
+ that I rm -rf the source tree :P
+ <nlightnfotis> I know it's bad d/w
+ <nlightnfotis> braunr: I found something interesting (I don't know if it's
+ expected or not; probably not): If I set GOMAXPROCS to 2, and run the
+ goroutine program, it seems to be running for a while (with the
+ goroutines!) and then it segfaults. Will look more into it
+ <braunr> it's interesting, yes
+ <braunr> nlightnfotis: have you tried the preload trick too ?
+ <nlightnfotis> ldpreload? no. Could you tell me how to do it? export
+ LDPRELOAD and a path to libpthread?
+ <braunr> nlightnfotis: LD_PRELOAD=/lib/i386-gnu/libpthread.so.0.3 ...
+ <nlightnfotis> braunr: it also produces a very different backtrace. This
+ one heavily involves mig functions
+ <tschwinge> braunr, nlightnfotis: Thanks for working together, and sorry
+ for my lack of time.
+ <braunr> nlightnfotis: paste please
+ <nlightnfotis> tschwinge, Hello. It's ok, I am sorry for not showing good
+ amounts of progress from my part.
+ <nlightnfotis> braunr: http://pastebin.com/J4q2NN9p
+ <braunr> nlightnfotis: thread apply all bt full please
+ <nlightnfotis> braunr: http://pastebin.com/tbRkNzjw
+ <braunr> looks like an infinite loop of
+ __mach_port_mod_refs/__mig_dealloc_reply_port
+ <braunr> ...
+ <nlightnfotis> yes that's what I got from it too. Keep in mind these
+ results are with GOMAXPROCS=2 and they result in segmentation fault
+ <nlightnfotis> and I also can not understand the corrupted stack at the
+ beginning of the backtrace
+ <braunr> no please
+ <nlightnfotis> ?
+ <braunr> test LD_PRELOAD=/lib/i386-gnu/libpthread.so.0.3 without
+ GOMAXPROCS=2
+ <nlightnfotis> braunr: LD_PRELOAD without GOMAXPROCS results in the usual
+ assertion failure and abortion of execution after it
+ <braunr> nlightnfotis: ok
+ <braunr> nlightnfotis: im sorry, i thought you couldn't launch a test since
+ you added mach_print
+ <nlightnfotis> I am not using mach_print, I couldn't fix the issue with the
+ references and thought I was losing time, so I went back to debugging
+ with gdb until I can't get anything more out of it
+ <nlightnfotis> braunr: should I focuse on mach_print? Will it produce very
+ different results than gdb?
+ <nlightnfotis> *focus
+ <nlightnfotis> (btw I didn't delete mach print or anything, it's still
+ there, in another branch)
+ <nlightnfotis> braunr: Now I stepped through the program in gdb, and got
+ something really really weird. Some close to a full execution
+ <nlightnfotis> Number of gorountines and machine threads according to
+ runtime was 3, __pthread_num_threads was 4
+ <nlightnfotis> it did get SIGILL (illegal instruction some times though)
+ <nlightnfotis> and it exited with code 02
+ <braunr> uh
+ <braunr> nlightnfotis: try with mach_print yes, it will show the values
+ from the real execution context, and be as close as what we can get
+ <braunr> i'm not sure about how gdb finds the values
+ <nlightnfotis> braunr: ok, will spend the rest of the day to find a way to
+ make mach_print and the other values work. Did you see my last messages,
+ with the goroutines that worked under gdb?
+ <braunr> yes
+ <nlightnfotis> it seemed to run. Didn't get the expected output, but also
+ didn't get any errors other than illegal instruction either
+ <nlightnfotis> braunr: I still have not found an easy way to do what you
+ asked me to from go's runtime. Would it be ok if I do it from inside
+ libpthread?
+ <braunr> nlightnfotis: do what ?
+ <nlightnfotis> print the values of __pthread_total and
+ __pthread_num_threads with mach_print.
+ <braunr> how ?
+ <braunr> oh wait
+ <braunr> well yes ofc, they're not exported :/
+ <braunr> nlightnfotis: have you been able to use mach_print ?
+ <nlightnfotis> braunr: not really because of the problems I shared
+ earlier. I can try to use with in-gcc structures if you want me to, it's
+ nothing hard to do
+ <nlightnfotis> actually I will. Hang on
+ <braunr> proceed with debugging inside libpthread instead
+ <braunr> using mach_print to avoid deadlocks this time
+ <braunr> (mach_print was purposely built for debugging such low level code
+ parts)
+ <nlightnfotis> ok, I will patch this, but can I build it tomorrow?
+ <braunr> yes
+ <braunr> just keep us informed
+ <nlightnfotis> ok, thanks, and sorry for everything I have done. I want you
+ to know that I really appreciate that you are helping me.
+ <braunr> remember: the goal here is to understand why __pthread_total and
+ __pthread_num_threads have inconsistent values
+ <nlightnfotis> braunr: whenever you see it, mach_print works as expected
+ inside gcc.
+
+
+# IRC, freenode, #hurd, 2013-09-03
+
+ <nlightnfotis> braunr: I have made the changes I want to glibc. After I
+ build it, how do I install it? make install or is it more involved?
+ <braunr> nlightnfotis: use LD_LIBRARY_PATH
+ <braunr> never install an experimental glibc unless you have backups or are
+ certain of what you're doing
+ <braunr> nlightnfotis: i didn't understand what you meant about mach_print
+ yesterday
+ <nlightnfotis> it works in gcc.
+ <braunr> what do you mean "in gcc" ?
+ <braunr> why would you put mach_print in gcc ?
+ <braunr> we want it in go programs ..
+ <nlightnfotis> yes, I understand it. gcc was the fastest way to test it's
+ usage at that moment (for me) and I just wanted to confirm it works. I
+ only had to change its signature to const char * because gcc wouldn't
+ accept it otherwise
+ <braunr> doesn't my example include const ?
+ <braunr> nlightnfotis: why did you rebuild glibc ?
+ <nlightnfotis> braunr: I have not started yet, will do now, to apply the
+ changes to libpthread
+ <braunr> you mean add the print calls there ?
+ <nlightnfotis> yes
+ <braunr> ok
+ <braunr> use debian/rules build, interrupt when you see gcc invocations
+ <braunr> then switch to the build directory (hurd-libc-i386 iirc), and make
+ others
+ <braunr> nlightnfotis: did you send me the instructions to build and test
+ your work ?
+ <braunr> so i can reproduce these weird threading problems at my side
+ <nlightnfotis> braunr: sorry, I was in the toilet, where would you like me
+ to send the instructions?
+ <braunr> nlightnfotis: i should be fine i guess, let's check here
+ <braunr> nlightnfotis: i simply used configure
+ --enable-languages=c,c++,go,lto
+ <braunr> and i'll see how it goes
+ <nlightnfotis> I configure with --enable-languages=go (it automatically
+ builds c and c++ for that as go depends on them), --disable-bootstrap,
+ and use a custom prefix to install at a custom location
+ <braunr> yes
+ <braunr> ok
+ <braunr> nlightnfotis: how long does it take you ?
+ <nlightnfotis> complete non-bootstrap build about 45 minutes. With a build
+ tree ready and only simple changes, about 2-3 minutes
+ <nlightnfotis> braunr: In an hour I will go offline for 2-3 hours, I am
+ gonna move back to my other home in the other city. It won't take long,
+ the whole process will be about 4 hours, and I will compensate for the
+ time lost by staying up late up until 3 o clock in the morning
+ <braunr> i'd prefer you didn't "compensate"
+ <nlightnfotis> ?
+ <braunr> work if you want to
+ <braunr> noone if forcing you to work late at night for gsoc, unless you
+ want to
+ <nlightnfotis> no, I do it because I want to. I **really** really want to
+ succeed, and time is off the essence for me at this point
+ <braunr> then ok
+ <braunr> nlok i have a gccgo compiler
+ <pinotree> nlok?
+ <braunr> nl being nlightnfotis but he's gone
+ <pinotree> oh
+ * pinotree was trying to parse that as "now" or "look" or the like
+ <nlightnfotis> braunr: 08:19:56< braunr> use debian/rules build, interrupt
+ when you see gcc invocations: Are gcc invocations related to
+ i486-gnu-gcc-4.7?
+ <nlightnfotis> nvm I'm good now :)
+ <gnu_srs> of course not, that's only for compiling applications using the
+ newly built libc
+ <nlightnfotis> gnu_srs: I didn't exactly understand what you said? Care to
+ elaborate? which one is for compiling applications using the newly build
+ libc? -486-gnu-gcc-4.7?
+ <gnu_srs> when you see gcc ... -llibc.so you know libc.so is built, and
+ that is sufficient to use it.
+ <gnu_srs> with LD_PRELOAD or LD_LIBRARY_PATH (after cding and building
+ others)
+ <nlightnfotis> gnu_srs: thanks for the tip :)
+ <gnu_srs> :-D
+ <nlightnfotis> is anyone else getting glibc build problems? (from apt-get
+ source glibc, at cxa-finalize.c)?
+ <gnu_srs> apt-get source eglibc; apt-get build-dep eglibc (as root);
+ dpkg-buildpackage -b ...
+ <braunr> nlightnfotis: just debian/rules build
+ <braunr> to start the glibc build
+ <nlightnfotis> braunr: oh I have now, it's building without issues so far
+ <braunr> when you see gcc processes, it means the build process has
+ switched from configuring to making
+ <braunr> then interrupt (ctrl-c)
+ <braunr> cd build-tree/hurd-i386-libc
+ <braunr> make others
+ <braunr> or make lib others
+ <braunr> lib is glibc, others is some addons which include our libpthread
+ <nlightnfotis> thanks for the tip braunr.
+ <nlightnfotis> braunr: I have managed to get a working version of glibc and
+ libpthread with mach_print working. I have also run 2 test programs and
+ it works as expected. Will continue researching tomorrow if that's ok
+ with you, I am too tired to keep on now.
+ <nlightnfotis> for the record compilation of glibc right from the start was
+ about 1 hour and 20 - 30 minutes
+
+
+# IRC, freenode, #hurd, 2013-09-04
+
+ <braunr> i've taken a deeper look at this assertion failure
+ <braunr> and ...
+ <braunr> it has nothing to do with pthread_create
+ <braunr> i assumed it was the one in sysdeps/mach/pt-thread-start.c
+ <nlightnfotis> pthread_self ()?
+ <braunr> but it's actually from sysdeps/mach/hurd/pt-sysdep.h, in
+ _pthread_self()
+ <braunr> and looking there :
+ <braunr> thread = *(struct __pthread **)__hurd_threadvar_location
+ (_HURD_THREADVAR_THREAD);
+ <braunr> so simply put, context switching doesn't fix up thread specific
+ data ...
+ <braunr> it's that simple
+ <nlightnfotis> wow
+ <nlightnfotis> today I was running programs all day long with mach_print on
+ to print __pthread_total and __pthread_num_threads to see when both
+ become 1 and couldn't find anything
+ <nlightnfotis> I was nearly desperate. You just made my day! :)
+ <braunr> now the problem is
+ <braunr> thread specific data is highly dependent on the stack
+ <braunr> it's illegal to make a thread switch stack and expect it to keep
+ working on the hurd
+ <nlightnfotis> unless split stack is activated?
+ <nlightnfotis> no wait
+ <braunr> split stack is completely unsupported on the hurd
+ <teythoon> uh, why would that be?
+ <braunr> teythoon: about split stack ?
+ <teythoon> yes
+ <braunr> i'm not sure
+ <nlightnfotis> at least now we do know what the problem is and I can start
+ working on a solution.
+ <nlightnfotis> braunr: we should tell tschwinge and youpi about it.
+ <braunr> nlightnfotis: sure but
+ <braunr> nlightnfotis: you can also start looking at a workaround
+ <braunr> nlightnfotis: also, let's makre sure that's the reason first
+ <braunr> nlightnfotis: use mach_print to display the stack pointer when
+ switching
+ <braunr> nlightnfotis:
+ http://stackoverflow.com/questions/1880262/go-forcing-goroutines-into-the-same-thread
+ <braunr> " I believe runtime.LockOSThread() is necessary if you are
+ creating a library binding from C code which uses thread-local storage"
+ <braunr> oh, a paper about the go runtime scheduler
+ <braunr> let's have a look ..
+ <teythoon> braunr: have you seen the high level overview presented in that
+ blog post I once posted here?
+ <braunr> no
+ <nlightnfotis> braunr, just came back, and read the log. Which paper are
+ you reading? The one from columbia university?
+ <braunr> but i need to know about details here, specifically, if threads do
+ change stack
+ <braunr> nlightnfotis: yes
+ <teythoon> braunr: ok
+ <braunr> this could be caused either by true stack switching, or by "stack
+ segmentation" as implemented by go
+ <braunr> it is interesting that there are stack related members per
+ goroutine
+ <braunr> nlightnfotis: in particular, pthread_attr_setstacksize() doesn't
+ work on the hurd
+ <nlightnfotis> <braunr> it is interesting that there are stack related
+ members per goroutine -> I think that's go's policy. All goroutines run
+ on a shared address space (that is the kernel thread's address space)
+ <braunr> nlightnfotis: that's obvious
+ <braunr> and not the problem
+ <braunr> and yes, it's "stack segmentation"
+ <braunr> and on linux, and probably other archs, switching stack may be
+ perfectly legit
+ <braunr> on the hurd, we still have threadvars
+ <braunr> which are the hurd specific thread local storage mechanism
+ <braunr> it means 1/ all stacks in a process must have the same size
+ <braunr> 2/ stack size must be a power of two
+ <braunr> 3/ threads can't switch stack
+ <braunr> this hardly prevents goroutines from being run by just any thread
+ <braunr> i see there already hard hurd specific changes about stack
+ handling
+ <nlightnfotis> so we should only make changes to the specific gccgo
+ scheduler as a workaround under the Hurd right?
+ <braunr> i don't know
+ <braunr> this might also push the switch to tls
+ <nlightnfotis> this sounds better as a long term fix
+ <nlightnfotis> but it must also involve a great amount of work, right?
+ <braunr> most of it has already been done
+ <braunr> by youpi and tschwinge
+ <nlightnfotis> with the changes to tls early in the summer?
+ <braunr> maybe
+ <braunr> 14:36 < braunr> nlightnfotis: also, let's makre sure that's the
+ reason first
+ <braunr> 14:36 < braunr> nlightnfotis: use mach_print to display the stack
+ pointer when switching
+ <braunr> check what goes wrong with the stack
+ <braunr> then we'll see
+ <braunr> as a very simple workaround, i expect locking g's on m's to be a
+ good first step
+ <nlightnfotis> braunr: noted everything. that's my work for tonight. I
+ expect myself to stay up late like yesterday and have this all figured
+ out by tomorrow.
+ <braunr> nlightnfotis: why not now ?
+ <nlightnfotis> I am starting from now, but I expect myself to stop about 6
+ o clock here (2 hours) because I have an appointment with a doctor.
+ <nlightnfotis> and keep on when I come back home
+ <braunr> well adding a few printfs to track the stack should be doable
+ before 2 hours
+ <nlightnfotis> braunr: I am doing it now. Will report as soon as I have
+ results :)
+ <nlightnfotis> braunr: have I messed up with the way I read esp's value?
+ https://github.com/NlightNFotis/glibc/commit/fdab1f5d45a43db5c5c288c4579b3d8251ee0f64#L1R67
+ <braunr> nlightnfotis: +unsigned
+ <braunr> nlightnfotis: using gdb :
+ <braunr> (gdb) info registers
+ <braunr> esp 0x203ff7c0 0x203ff7c0
+ <braunr> (gdb) print thread->stackaddr
+ <braunr> $2 = (void *) 0x2000000
+ <nlightnfotis> oh yes, I know about gdb, I thought you wanted me to use
+ mach_print
+ <braunr> nlightnfotis: yes
+ <braunr> this is just my own attempt
+ <braunr> and it does show the stack pointer is completely outside the
+ thread stack
+ <braunr> nlightnfotis: in your code, i suggest using
+ __builtin_frame_address()
+ <braunr> well __builtin_frame_address(0)
+ <braunr> see
+ http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/Return-Address.html#Return-Address
+ <braunr> it's not exactly the stack pointer but close enough, unless of
+ course the stack is changed in the middle of the function
+ <nlightnfotis> I see. I am gonna try one more time with esp the way I
+ worked it and if it fails to work, I am gonna use return address
+ <braunr> nlightnfotis: be very careful about signed/unsigned and type
+ widths
+ <braunr> not return address, frame address
+ <braunr> return address is code, frame address is data (stack)
+ <nlightnfotis> ah, I see, thanks for the correction.
+ <braunr> youpi: not sure you catched it earlier, the problem fotis has been
+ having with goroutines is about threadvars
+ <braunr> simply put, threads use setcontext functions to save/restore
+ goroutines state, which make them switch stack, rendering the location of
+ threadvars invalid, and making _pthread_self() choke
+
+
+# IRC, freenode, #hurd, 2013-09-05
+
+ <nlightnfotis> I am having very weird behavior with my code, something that
+ I can not explain and seems likely to be a bug, could someone else take a
+ look?
+ <nlightnfotis> pinotree are you available at the moment to take a look at
+ something?
+ <pinotree> nlightnfotis: dont ask to ask, just ask
+ <nlightnfotis> I have made some modifications to pthread_self as also
+ suggested by braunr to see if the stack pointer is within the bounds of
+ the frame address after context switching. I can get the values of both
+ esp and frame_address to be shown before the context switch, but I can
+ only get the value of esp to be shown after the context switch, and it
+ always results to the program getting killed
+ <nlightnfotis>
+ https://github.com/NlightNFotis/glibc/blob/7e72da09a42b1518865f6f4882d68689e681f25b/libpthread/sysdeps/mach/hurd/pt-sysdep.h#L97
+ <nlightnfotis> thing is a dummy print value I have right after the code
+ that was supposed to print the frame_address after the context switching
+ is executing without any issues.
+ <pinotree> oh assembler... cannot help, sorry :/
+ <nlightnfotis> oh no, I am not asking for assembler help, that part works
+ quite alright. I am asking why from the 4 identical pieces of code that
+ print debugging values the last one doesn't work. I am on it all day, and
+ still have not found an answer
+ <braunr> nlightnfotis: i can
+ <nlightnfotis> hello braunr,
+ <braunr> nlightnfotis: do you have a backtrace ?
+ <braunr> uh
+ <nlightnfotis> nope, it crashes right after I execute something. Let me
+ compile glibc once again and see if a fix I attempted works
+ <braunr> malloc and free use locks
+ <braunr> so they probably use _pthread_self
+ <braunr> don't use them
+ <braunr> for debugging, a simple statically allocated buffer on the stack
+ will do
+ <braunr> nlightnfotis: so ?
+ <nlightnfotis> Ι got past my original problem, but now I am trying to get
+ past the sigkills that kill the program at the beginning
+ <nlightnfotis> i remember not having this problem, so I am compiling my
+ master branch to see if it is reproducible. If it is, it means something
+ is very wrong. If it's not, it means I screwed up somewhere
+ <braunr> i don't understand, how do you know if you get past the problem if
+ you still have trouble reaching that code ?
+ <nlightnfotis> braunr: I fixed all my problems now. I can see that both esp
+ and the frame_address are the same after context switching though?
+ <braunr> always ?
+ <braunr> for all goroutines ?
+ <nlightnfotis> for all kernel threads, not go routines. We are in
+ libpthread
+ <braunr> if they're the same after a context switch, it usually means the
+ scheduler didn't switch
+ <braunr> well obviously
+ <braunr> but what i asked you was to trace calls to setcontext functions
+ <nlightnfotis> I will run some tests again. May I show you my code to see
+ if there is anything wrong with it?
+ <braunr> what address do you have ?
+ <braunr> not yet
+ <braunr> i'm not sure you understand what i want to check
+ <braunr> do you see how threadvars work basically ?
+ <nlightnfotis> I think so yes, they keep in the stack the local variables
+ of a thread right?
+ <nlightnfotis> and the globals
+ <nlightnfotis> or
+ <nlightnfotis> wait a minute...
+ <braunr> yes but do you see how the thread specific data are fetched ?
+ <nlightnfotis> with __hurd_threadvar_location_from_sp?
+ <braunr> yes but "basically", what does it do ?
+ <nlightnfotis> it get's a stack pointer as a parameter, and returns the
+ location of that specific data based on that stack pointer, right?
+ <braunr> and how ?
+ <nlightnfotis> I believe it must compare the base value of the stack and
+ the value of the end of the stack, and if the results are consistent, it
+ returns a pointer to the data?
+ <braunr> and how does it determine the start and end of the stack ?
+ <nlightnfotis> stack_pointer must be pointing at the base of the
+ stack. That + stack_size must be the stack limit I guess.
+ <braunr> so you're saying the caller of __hurd_threadvar_location_from_sp
+ knows the stack base ?
+ <nlightnfotis> I am not so sure I understand this question.
+ <braunr> i want to know if you understand how threadvars work
+ <braunr> apparently you don't
+ <braunr> the caller only has its current stack pointer
+ <braunr> which does *not* point to the stack base
+ <braunr> threadvars work by assuming a *fixed* stack size, power of two,
+ aligned (obviously)
+ <braunr> in our case, 2MiB (except in hurd servers where a kludge reduces
+ that to 64k)
+ <braunr> this is why stack size can't be changed
+ <braunr> this is also why the stack pointer can't ever point outside the
+ initial stack
+ <braunr> i want you to make sure go violates this last assumption
+ <braunr> so 1/ show the initial stack boundaries of your threads, then show
+ that, after loading a goroutine, the stack pointer is outside
+ <braunr> which is what, if i'm right, triggers the assertion
+ <braunr> ask if there is anything confusing
+ <braunr> this is important, it should already have been done
+ <nlightnfotis> ok, I noted it all, I am starting to work on it right now. I
+ only have one question. My results, the ones with the stack pointer and
+ the frame address, are expected or unexpected?
+ <braunr> i don't know
+ <braunr> show me the code again please
+ <braunr> and explain your intent
+ <nlightnfotis>
+ https://github.com/NlightNFotis/glibc/blob/7fe202317db4c3947f8ae1d1a4e52f7f0642e9ed/libpthread/sysdeps/mach/hurd/pt-sysdep.h
+ <nlightnfotis> At first I print the value of esp and the frame_address
+ before the context switching and after the context switching.
+ <nlightnfotis> The different variables were introduced as part of a test to
+ see if my results were consistent,
+ <braunr> what context switch ?
+ <nlightnfotis> in hurd_threadvar_location
+ <braunr> what makes you think this is a context switch ?
+ <nlightnfotis> in threadvar.h, it calls __hurd_threadvar_location_from_sp.
+ <nlightnfotis> the full path for it is glibc/hurd/hurd/threadvar.h
+ <braunr> i don't see how giving me the path will explain why it's a context
+ switch
+ <braunr> and i can tell you right away it's not
+ <braunr> hurd_threadvar_location is basically a lookup returning the
+ address of the thread specific data
+ <nlightnfotis> wait a minute...does this mean that
+ hurd_threadvar_location_from_sp is also a lookup function for the same
+ reason
+ <nlightnfotis> ?
+ <braunr> yes
+ <braunr> isn't the name meaningful enough ?
+ <braunr> "location of the threadvars from stack pointer"
+ <nlightnfotis> I guess I made wrong deductions from when you originally
+ shared your findings...
+ <nlightnfotis> <braunr> thread = *(struct __pthread
+ **)__hurd_threadvar_location (_HURD_THREADVAR_THREAD);
+ <nlightnfotis> <braunr> so simply put, context switching doesn't fix up
+ thread specific data ...
+ <nlightnfotis> I thought that hurd_threadvar_location was doing the context
+ switching
+ <braunr> nlightnfotis: by context switching, i mean setcontext functions
+ <nlightnfotis> braunr: You mean the one in sysdeps/mach/hurd/i386?
+ <braunr> yes
+ <braunr> but
+ <braunr> do you understand what i want you to check now ?
+ <nlightnfotis> I think I got this time: Let me explain it:
+ <nlightnfotis> You suggested that stack sizes are fixed. That is the main
+ reason that the stack pointer should not be able to point outside of it.
+ <braunr> no
+ <braunr> locating threadvars is done by applying a mask, computed from the
+ stack size, on the stack pointer, to determine its base
+ <nlightnfotis> yeah, what __hurd_threadvar_location_from_sp is doing
+ <braunr> if size is a power of two, size - 1 is a mask that, if
+ complemented, aligns the address
+ <braunr> yes
+ <braunr> so, threadvars expect the stack pointer to always point to the
+ initial stack
+ <nlightnfotis> and we wanna prove that go violates this rule right? That
+ the stack pointer is not pointing at the initial stack
+ <braunr> yes
diff --git a/community/gsoc/project_ideas/download_backends.mdwn b/community/gsoc/project_ideas/download_backends.mdwn
index f794e814..c0bdc5b2 100644
--- a/community/gsoc/project_ideas/download_backends.mdwn
+++ b/community/gsoc/project_ideas/download_backends.mdwn
@@ -1,12 +1,12 @@
-[[!meta copyright="Copyright © 2009 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2009, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
document under the terms of the GNU Free Documentation License, Version 1.2 or
any later version published by the Free Software Foundation; with no Invariant
Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
-is included in the section entitled
-[[GNU Free Documentation License|/fdl]]."]]"""]]
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
[[!meta title="Use Internet Protocol Translators (ftpfs etc.) as Backends for Other Programs"]]
@@ -19,8 +19,9 @@ Download protocols like FTP, HTTP, BitTorrent etc. are very good candidates for
this kind of modularization: a program could simply use the download
functionality by accessing FTP, HTTP etc. translators.
-There is already an ftpfs translator in the Hurd tree, as well as an [httpfs
-translator on hurdextras](http://www.nongnu.org/hurdextras/#httpfs); however,
+There is already an [[hurd/translator/ftpfs]] translator in the Hurd tree, as
+well as an [[hurd/translator/httpfs]] on
+[hurdextras](http://www.nongnu.org/hurdextras/); however,
these are only suitable for very simple use cases: they just provide the actual
file contents downloaded from the URL, but no additional status information
that are necessary for interactive use. (Progress indication, error codes, HTTP
diff --git a/community/gsoc/project_ideas/mtab/discussion.mdwn b/community/gsoc/project_ideas/mtab/discussion.mdwn
new file mode 100644
index 00000000..716fb492
--- /dev/null
+++ b/community/gsoc/project_ideas/mtab/discussion.mdwn
@@ -0,0 +1,2072 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+[[!tag open_issue_hurd]]
+
+# IRC, freenode, #hurd, 2013-04-17
+
+ <kuldeepdhaka> thinking how to get the listing. traversing would be
+ ineffecient, trying to come up with something better
+ <braunr> what listing ?
+ <braunr> and traversing what ?
+ <kuldeepdhaka> mtab
+ <braunr> well i assumed so
+ <braunr> be more precise please
+ <kuldeepdhaka> when the translator is done initalized <translation
+ info> are written to /etc/mtab <translation info> will be provided
+ by the translator, and when some one want to read the info just read it
+ this way if their is some credentials like ftp sites pass username can be
+ masked by the translator
+ <kuldeepdhaka> if some trans dont want to list them, no need to write to
+ file | while unmounting (sorry i couldnt find the right word) , it
+ will pass the mount node address | <translation info> will have special
+ structure to remove/add mounts example "a /mount-to /mount-from" = add
+ , "r /mount-to" = remove here "/mount-to" will be unique for every
+ mount
+ <kuldeepdhaka> this have a draw back , we would have to trust trans for the
+ listed data | also "/mount-to" + "/mount-from" could be used a
+ combination for making sure that other trans unable remove others trans
+ mount data
+ <kuldeepdhaka> sorry but "also "/mount-to" + "/mount-from" could be used a
+ combination for making sure that other trans unable remove others trans
+ mount data" this is a bad idea if we had to print the whole thing
+ <kuldeepdhaka> braunr, whats ur opinion?
+ <pinotree> you don't need a mtab to "unmount" things on hurd
+ <braunr> kuldeepdhaka: hum, have you read the project idea ?
+ <braunr>
+ http://darnassus.sceen.net/~hurd-web/community/gsoc/project_ideas/mtab/
+ <braunr> A more promising approach is to have mtab exported by a special
+ translator, which gathers the necessary information on demand. This could
+ work by traversing the tree of translators, asking each one for mount
+ points attached to it.
+ <kuldeepdhaka> pinotree, not to unmount, i mean is to remove the
+ <translation data>
+ <braunr> for a first implementation, i'd suggest a recursive traversal of
+ root-owned translators
+ <kuldeepdhaka> braunr, hum, but it did stated it as inefficient
+ <braunr> where ?
+ <kuldeepdhaka> para 5 , line 3
+ <kuldeepdhaka> and line 6
+ <braunr> no
+ <braunr> traversing "all" nodes would be inefficient
+ <braunr> translators which host the nodes of other translators could
+ maintain a simple list of active translators
+ <braunr> ext2fs, e.g. (if that's not already the case) could keep the list
+ of the translators it started
+ <braunr> we can already see that list with pstree for example
+ <braunr> but this new list would only retain those relevant for mtab
+ <braunr> i.e. root-owned ones
+ <pinotree> i would not limit to those though
+ <braunr> and then filter on their type (e.g. file system ones)
+ <braunr> pinotree: why ?
+ <pinotree> this way you could have proper per-user /proc/$pid/mounts info
+ <braunr> we could also very easily have a denial of service
+ <kuldeepdhaka> but how will the mount point and source point will be
+ listed?
+ <braunr> they're returned by the translator
+ <kuldeepdhaka> k
+ <braunr> you ask /, it returns its store and its options, and asks its
+ children recursively
+ <braunr> a /home translator would return its store and its options
+ <braunr> etc..
+ <braunr> each translator would build the complete path before returning it
+ <braunr> sort of, it's very basic
+ <braunr> but that would be a very hurdish way to do it
+ <kuldeepdhaka> shall /etc/mtab should be made seek-able and what should be
+ the filesize? content are generated on demand so, it could arise problem
+ (fsize:0 , seek-able:no), ur opinions?
+ <braunr> kuldeepdhaka: it should have all the properties of a regular file
+ <braunr> the filesize would be determined after it's generated
+ <braunr> being empty doesn't imply it's not seekable
+ <kuldeepdhaka> content is generated on demand so, could cause problem while
+ seeking and filesize, shall i still program as regular file?
+ <kuldeepdhaka> in two different read, it could generate different content,
+ though same seek pos is used...
+ <braunr> what ?
+ <braunr> the content is generated on open
+ <kuldeepdhaka> ooh, ok
+
+
+# IRC, freenode, #hurd, 2013-06-04
+
+ <safinaskar> how to see list of all connected translators?
+ <braunr> you can't directly
+ <braunr> you can use ps to list processes and guess which are translators
+ <braunr> (e.g. everything starting with /hurd/)
+ <braunr> a recursive call to obtain such a list would be useful
+ <braunr> similar to what's needed to implement /proc/mounts
+
+
+# IRC, freenode, #hurd, 2013-06-25
+
+In context of [[open_issues/mig_portable_rpc_declarations]].
+
+ <teythoon> should I go for an iterator like interface instead?
+ <teythoon> btw, what's the expected roundtrip time?
+ <braunr> don't think that way
+ <braunr> consider the round trip delay as varying
+ <teythoon> y, is it that bad?
+ <braunr> no
+ <braunr> but the less there is the better
+ <braunr> we think the same with system calls even if they're faster
+ <braunr> the delay itself isn't the real issue
+ <braunr> look at how proc provides information
+ <braunr> (in procfs for example)
+
+
+## IRC, freenode, #hurd, 2013-06-26
+
+ <teythoon> so tell me about the more hurdish way of dealing with that issue
+ <teythoon> creating a specialized translator for this?
+ <braunr> 11:45 < pinotree> there's also
+ http://darnassus.sceen.net/~hurd-web/community/gsoc/project_ideas/mtab/
+ about that topic
+ <braunr> you need to avoid thinking with centralization in mind
+ <braunr> the hurd is a distributed system in practice
+ <braunr> i think proc is the only centralized component in there
+ <teythoon> braunr: would having an mtab translator and having fs
+ translators register to thae be acceptable?
+ <teythoon> that*
+ <braunr> teythoon: why do you want to centralize it ?
+ <braunr> translators already register themselves when they get attached to
+ a node
+ <braunr> we don't want an additional registration
+ <braunr> have you read the link we gave you ?
+ <teythoon> I did and I got the message, but isn't the concept of
+ /proc/mounts or a mtab file already a centralized one?
+ <braunr> that doesn't mean the implementation has to be
+ <braunr> and no, i don't think it's centralized actually
+ <braunr> it's just a file
+ <braunr> you can build a file from many sources
+ <teythoon> or if we do it your way recursing on fs translators *but*
+ restricting this to root owned translators also suffering from
+ centralization wrt to the root user? I mean the concept of all mounted
+ filesystems does not apply cleanly to the hurd
+ <braunr> i don't understand
+ <braunr> restricting to the root user doesn't mean it's centralized
+ <braunr> trust has nothing to do with being centralized
+ <teythoon> I guess I'm not used to thinking this way
+ <braunr> teythoon: i guess that's the main reason why so few developers
+ work on the hurd
+ <teythoon> also the way fs notification is done is also centralized, that
+ could also be done recursively
+ <braunr> what doyou call fs notification ?
+ <teythoon> and the information I need could just be stuffed into the same
+ mechanism
+ <teythoon> fs translators being notified of system shutdown
+ <braunr> right
+ <braunr> that gets a bit complicated because the kernel is also a
+ centralized component
+ <braunr> it knows every memory object and their pagers
+ <braunr> it manages all virtual memory
+ <braunr> there are two different issues here
+ <braunr> syncing memory and shutting down file systems
+ <braunr> the latter could be done recursively, yes
+ <braunr> i wonder if the former could be delegated to external pagers as
+ well
+ <braunr> teythoon: but that's not the focus of your work aiui, it would
+ take much time
+ <teythoon> sure, but missing an mtab file or better yet /proc/mounts could
+ be an issue for me, at least a cosmetic one, if not a functional one
+ <braunr> i understand
+ <teythoon> and hacking up a quick solution for that seemed like a good
+ exercise
+ <braunr> i suggest you discuss it with your mentors
+ <braunr> they might agree to a temporary centralized solution
+ <braunr> although i don't think it's much simpler than the recursive one
+ <teythoon> braunr: would that be implemented in libdiskfs and friends?
+ <braunr> teythoon: i'm not sure, it might be a generic fs operation
+ <braunr> libnetfs etc.. are also mount points
+ <teythoon> so where would it go if it was generic?
+ <braunr> libfshelp perhaps
+ <teythoon> translator startup is handled in start-translator-long.c, so in
+ case a startup is successful, I'd add it to a list?
+ <braunr> i'd say so, yes
+ <teythoon> would that cover all cases, passive and active translators?
+ <braunr> that's another question
+ <braunr> do we consider passive translators as mounted ?
+ <teythoon> ah, that was not what i meant
+ <braunr> i know
+ <braunr> but it's related
+ <teythoon> start b/c of accessing a passive one vs. starting an active one
+ using settrans
+ <braunr> start_translator_xxx only spawn active translators
+ <braunr> it's the same
+ <teythoon> ok
+ <braunr> the definition of a passive translator is that it starts the
+ active translator on access
+ <teythoon> yeah I can see how that wouldn't be hard to implement
+ <braunr> i think we want to include passive translators in the mount table
+ <braunr> so registration must happen before starting the active one
+ <teythoon> so it's a) keeping a list of active translators and b) add an
+ interface to query fs translators for this list and c) an interface to
+ query mtab style information?
+ <braunr> keeping a list of all translators attached
+ <braunr> and yes
+ <braunr> well
+ <braunr> a is easy
+ <braunr> b is the real work
+ <braunr> c would be procfs using b
+ <teythoon> oh? I thought recursing on the translators and querying info
+ would be separate operations?
+ <braunr> why so ?
+ <braunr> the point is querying recursively :)
+ <braunr> and when i say recursively, it's only a logical view
+ <teythoon> ok, yes, it can be implemented this way, so we construct the
+ list while recursing on the translators
+ <braunr> i think it would be better to implement it the way looking up a
+ node is done
+ <teythoon> in a loop, using a stack?
+ <braunr> iteratively
+ <braunr> a translator would provide information about itself (if
+ supported), and referrences to translators locally registered to it
+ <teythoon> could you point me to the node lookup?
+ <teythoon> ah, yes
+ <braunr> eg., you ask /, it tells you it's on /dev/hd0, read-write, with
+ options, and send rights to /home, /proc, etc..
+ <braunr> well rights, references
+ <braunr> it could be the path itself
+ <teythoon> rights as in a port to the translators?
+ <braunr> i think the path would be better but i'm not sure
+ <braunr> it would also allow you to check the permissions of the node
+ before querying
+ <teythoon> path would be nicer in the presence of stacked translators
+ <braunr> and obviously you'd have the path right away, no need to provide
+ it in the reply
+ <teythoon> true
+
+ <teythoon> braunr: if we want to list passive translators (and I agree, we
+ should), it isn't sufficient to touch libfshelp, as setting a passive
+ translator is not handled there, only the startup
+ <braunr> teythoon: doesn't mean you can't add something there that other
+ libraries will use
+ <braunr> so yes, not sufficient
+
+
+## IRC, freenode, #hurd, 2013-06-29
+
+ <teythoon> braunr: diskfs_S_fsys_set_options uses diskfs_node_iterate to
+ recurse on active translators if do_children is given
+ <teythoon> braunr: I wonder how fast that is in practice
+ <teythoon> braunr: if it's fast enough, there might not even be a need for
+ a new function in fsys.defs
+ <teythoon> and no need to keep a list of translators for that reason
+ <teythoon> braunr: if it's not fast enough, then diskfs_S_fsys_set_options
+ could use the list to speed this up
+ <braunr> teythoon: on all nodes ?
+ <teythoon> braunr: i believe so, yes, see libdiskfs/fsys-options.c
+ <braunr> teythoon: well, if it really is all node, you clearly don't want
+ that
+
+
+## IRC, freenode, #hurd, 2013-07-01
+
+ <teythoon> I've ment to ask, the shiny new fsys_get_translators interface,
+ should it return the options for the queried translator or not?
+ <braunr> i don't think it should
+ <teythoon> ok
+ <braunr> let's walk through why it shouldn't
+ <teythoon> may I assume that the last argument returned by fsys_get_options
+ is the "source"?
+ <braunr> how would you know these options ?
+ <braunr> the source ?
+ <teythoon> I wouldn't actually
+ <braunr> yes, you wouldn't
+ <braunr> you'd have to ask the translators for that
+ <braunr> so the only thing you can do is point to them
+ <teythoon> well, the device to include in the mtab file
+ <braunr> and the client asks
+ <braunr> i don't know fsys_get_options tbh
+ <teythoon> well, both tmpfs and ext2fs print an appropriate value for
+ "device" as last argument
+ <braunr> looks like a bad interface to me
+ <braunr> options should be options
+ <braunr> there should be a specific call for the device
+ <braunr> but if everyone agrees with the options order, you can do it that
+ way for now i guess
+ <teythoon> one that could be used to recreate the "mount" using either
+ mount or settrans
+ <braunr> just comment it where appropriate
+ <teythoon> I thought that'd be the point?
+ <braunr> ?
+ <teythoon> % fsysopts tmp
+ <teythoon> /hurd/tmpfs --writable --no-inherit-dir-group --no-sync 48K
+ <braunr> where is the device ?
+ <teythoon> % settrans -ca tmp $(fsysopts tmp)
+ <braunr> 15:56 < teythoon> well, both tmpfs and ext2fs print an appropriate
+ value for "device" as last argument
+ <teythoon> 48K
+ <braunr> i don't see it
+ <braunr> really ?
+ <teythoon> yes
+ <braunr> what about ext2fs ?
+ <braunr> hm ok i see
+ <teythoon> % fsysopts /
+ <teythoon> ext2fs --writable --no-inherit-dir-group --sync=10
+ --store-type=typed device:hd0s1
+ <braunr> i don't think you should consider that as devices
+ <braunr> but really translator specific options
+ <pinotree> agree
+ <teythoon> I don't ;)
+ <teythoon> b/c the translator calling convention is hardcoded in the mount
+ utility
+ <braunr> ?
+ <teythoon> I think it's reasonable to assume that this mapping can be
+ reversed
+ <pinotree> theorically you can write a translator that takes no arguments,
+ but just options
+ <braunr> the 48K string for tmpfs is completely meaningless
+ <braunr> in fstab, it should be none
+ <pinotree> "tmpfs"
+ <braunr> the linux equivalent is the size option
+ <braunr> no, none
+ <braunr> it's totally ignored
+ <braunr> and it's recommended to set none rather than the type to avoid
+ confusion
+ <teythoon> u sure?
+ <teythoon> % settrans -cga tmp /hurd/tmpfs --mode=666 6M
+ <teythoon> % settrans -cga tmp /hurd/tmpfs --mode=666 6M
+ <teythoon> % fsysopts tmp
+ <teythoon> /hurd/tmpfs --writable --no-inherit-dir-group --no-sync 6M
+ <braunr> i've not explained myself clearly
+ <braunr> it's not ignored by the translator
+ <braunr> but in fstab, it should be in the options field
+ <braunr> it's not the source
+ <braunr> clearly not
+ <teythoon> ah
+ <braunr> now i'm talking about fstab, but iirc the format is similar in
+ mtab/mounts
+ <pinotree> close, but not the same
+ <braunr> yes, close
+ <teythoon> ok, so I'll put a method into libfshelp so that translators can
+ explicitly set a device and patch all existing translators to do so?
+ <braunr> teythoon: what i meant is that, for virtual vile systems (actually
+ file systems with no underlying devices), the device field is normally
+ ignored
+ <braunr> teythoon: why do you need that for exactly
+ <teythoon> right
+ <pinotree> do they even have a "device" field?
+ <braunr> (i can see why but i'd like more visibility)
+ <braunr> pinotree: not yet
+ <braunr> pinotree: that's what he wants to add
+ <braunr> but i'd like to see if there is another way to get the information
+ <braunr> 16:05 < braunr> teythoon: why do you need that for exactly
+ <teythoon> well if I'm constructing a mtab entry I need a value for the
+ device field
+ <braunr> do we actually need it to be valid ?
+ <teythoon> not necessarily I guess
+ <braunr> discuss it with your mentors then
+ <youpi> it has to be valid for e2fsck checks etc.
+ <braunr> doesn't e2fsck check fstab actually ?
+ <youpi> i.e. actually for the cases where it's trivial
+ <youpi> fstab doesn't tell it whether it's mounted
+ <youpi> I mean fsck checking whether it's mounted
+ <youpi> not fsck -a
+ <braunr> oh
+ <braunr> couldn't we ask the device instead ?
+ <braunr> looks twisted too
+ <youpi> that'd mean patching a lot of applications which do similar checks
+ <braunr> yes
+ <braunr> teythoon: propose an interface for that with your mentors then
+ <teythoon> yeah, but couldn't you lay it out a little, I mean would it be
+ one procedure or like three?
+ <braunr> 16:04 < teythoon> ok, so I'll put a method into libfshelp so that
+ translators can explicitly set a device and patch all existing
+ translators to do so?
+ <teythoon> ok
+ <braunr> why three ?
+ <teythoon> no, I mean when adding stuff to fsys.defs
+ <braunr> i understood that
+ <braunr> but why three ? :)
+ <teythoon> it'd be more generic
+ <braunr> really ?
+ <braunr> please show a quick example of what you have in mind
+ <teythoon> i honestly don't know, thus I'm asking ;)
+ <braunr> well first, this device thing bothers me
+ <braunr> when you look at how we set up our ext2fs translators, you can see
+ they use device:xxx
+ <braunr> and not /dev/xxx
+ <braunr> but ok, let's assume it's harmless
+ <teythoon> ok, but isn't the first way actually better?
+ <braunr> i think it ends up being the same
+ <braunr> ideally, that's what we want to use as device path
+ <teythoon> but you can recreate a storeio translator using the device:xxx
+ info, the node is useless for that
+ <braunr> so that we don't need to explicitely set it
+ <braunr> ?
+ <braunr> what do you mean ?
+ <teythoon> well, fsysopts / tells currently tells me device:hd0s1
+ <braunr> for /, there isn't much choice
+ <braunr> /dev isn't there yet
+ <teythoon> ah, got it
+ <teythoon> that's why it differs...
+ <braunr> differs ?
+ <braunr> from what ?
+ <braunr> other ext2fs translators are set the same way by the debian
+ installer for example
+ <teythoon> % fsysopts /media/scratch
+ <teythoon> /hurd/ext2fs --writable --no-inherit-dir-group /dev/hd1s1
+ <teythoon> here it uses the path to the node
+ <braunr> that's weird
+ <braunr> was that done by the debian installer ?
+ <teythoon> ah no, that was me
+ <braunr> :p
+ <braunr> $ fsysopts /home
+ <braunr> /hurd/ext2fs --writable --no-inherit-dir-group --store-type=device
+ hd0s6
+ <braunr> so as you can see, it's not that simple to infer the device path
+ <teythoon> oho, yet another way ;)
+ <teythoon> right then
+ <pinotree> isn't device:hd0s1 as shortcut for specifying the store type, as
+ done with --store-type=device hd0s1?
+ <braunr> but perhaps we don't need to
+ <braunr> yes it is
+ <pinotree> iirc it's something libstore does, per-store prefixes
+ <braunr> ah that sucks
+ <braunr> teythoon: you may need to normalize those strings
+ <braunr> so that they match what's in fstab
+ <braunr> i.e. unix /dev paths
+ <braunr> otherwise e2fsck still won't be able to find the translators
+ mounting the device
+ <braunr> well, if it's mounted actually
+ <braunr> it just needs to find the matching line in mtab aiui
+ <braunr> so perhaps a libfshelp function for that, yes
+ <teythoon> braunr: so you suggest adding a normalizing function to
+ libfshelp that creates a /dev/path?
+ <braunr> yes
+ <braunr> used by the call you intend to add, which returns that device
+ string as found in fstab
+ <teythoon> found in fstab? so this would only work for translators managed
+ by fstab?
+ <braunr> no
+ <teythoon> ah
+ <teythoon> a string like the ones found in fstab?
+ <braunr> yes
+ <braunr> so that fsck and friends are able to know whether a device is
+ mounted or not
+ <braunr> i don't see any other purpose for that string in mtab
+ <braunr> you'd take regular paths as they are, convert device:xxx to
+ /dev/xxx, and return "none" for the rest i suppose
+ <teythoon> ok
+ <braunr> i'm not even sure it's right
+ <braunr> youpi: are you sure it's required ?
+ <teythoon> well it's a start and I think it's not too much work
+ <braunr> aiui, e2fsck may simply find the mount point in fstab, and ask the
+ translator if it's mounted
+ <teythoon> we can refine this later on maybe?
+ <braunr> or rather, init scripts, using mountpoint, before starting e2fsck
+ <braunr> teythoon: sure
+ <teythoon> there's this mountpoint issue... I need to run fsysopts /
+ --update early in the boot process
+ <teythoon> otherwise the device ids returned by stat(2)ing / are wrong and
+ mountpoint misbehaves
+ <teythoon> i guess b/c it's the rootfs
+ <braunr> device ids ?
+ <teythoon> % stat / | grep Device
+ <teythoon> Device: 3h/3d Inode: 2 Links: 22
+ <braunr> do you mean the major/minor identifiers ?
+ <teythoon> I do. if I don't do the --update i get seemingly random values
+ <braunr> i guess that's expected
+ <braunr> we don't have major/minor values
+ <braunr> well, they're emulated
+ <teythoon> well, if that's fixable, that'd be really nice ;)
+ <braunr> we'll never have major/minor values
+ <teythoon> yeah, I understand that
+ <braunr> but they could be fixed by MAKEDEV when creating device nodes
+ <teythoon> but not having to call fsys_set_options on the rootfs to get the
+ emulation up to speed
+ <braunr> try doing it from grub
+ <braunr> not sure it's possible
+ <braunr> but worth checking
+ <teythoon> by means of an ext2fs flag?
+ <braunr> yes
+ <braunr> if there is one
+ <braunr> i don't know the --update flag, is it new from your work ?
+ <teythoon> braunr: no, it's been there before. -oremount gets mapped to
+ that
+ <braunr> it's documented by fsysopts, but not by the ext2fs translators
+ <teythoon> libdiskfs source says something about flushing buffers iirc
+ <braunr> -s
+ <braunr> what does it do ?
+ <braunr> teythoon: ok
+ <teythoon> braunr: so the plan is to automatically generate a device path
+ from the translators argz vector but to provide the functionality so
+ translators can set a more appropriate value? did I get the last part of
+ the discussion right?
+ <braunr> not set, return
+ <teythoon> yeah return from the procedure but settable using libfshelp?
+ <braunr> why settable ?
+ <braunr> you'd have a fsys call to obtain the dev string, and the server
+ side would call libfshelp on the fly to obtain a normalized value and
+ return it
+ <teythoon> ah, make a function overrideable that returns an appropriate
+ response?
+ <braunr> overrideable ?
+ <teythoon> like netfs_append_args
+ <braunr> you wouldn't change the command line, no
+ <teythoon> isn't that done using weak references or something?
+ <teythoon> no I know
+ <braunr> sorry i'm lost then
+ <teythoon> never mind, I'll propose a patch early to get your feedback
+ <youpi> braunr: am I sure that _what_ is required?
+ <youpi> the device?
+ <youpi> e2fsck surely needs it, yes
+ <braunr> a valid device path, yes
+ <youpi> it can't rely only on fstab
+ <braunr> yes
+ <youpi> since users may mount things by hand
+ <braunr> i've used strace on it and it does perform lookups there
+ <braunr> (although i also saw uuid magic that i guess wouldn't work yet on
+ the hurd)
+
+
+## IRC, freenode, #hurd, 2013-07-03
+
+ <teythoon> I added a procedure to fsys.defs, added a server stub to my
+ tmpfs translator and wrote a simple client, but something hasn't picked
+ up the new message yet
+ <teythoon> % ./mtab tmp
+ <teythoon> ./mtab: get_translator_info: (ipc/mig) bad request message ID
+ <teythoon> I guess it's libhurduser.so from glibc, not sure though...
+ <braunr> glibc would only have the client calls
+ <braunr> what is "% ./mtab tmp" ?
+ <teythoon> mtab is my mtab tool/soon to be a translator testing thing, tmp
+ is an active tmpfs with the appropriate server stub
+ <braunr> so mtab has the client call, right ?
+ <teythoon> yes
+ <braunr> then tmpfs doesn't
+ <teythoon> so what to do about it?
+ <teythoon> i set LD_LIBRARY_PATH to my hurd builds lib dir, is that
+ preserved by settrans -a?
+ <pinotree> not really
+ <braunr> not at all
+ <braunr> there is a wiki entry about that iirc
+ <pinotree> http://darnassus.sceen.net/~hurd-web/hurd/debugging/translator/
+ <teythoon> yeah, I read it too once
+ <teythoon> ah
+ <braunr> on the other hand, using export to set the environment should do
+ the work
+ <teythoon> yes, that did the trick, thanks :)
+ * teythoon got his EOPNOPSUPP... *nomnomnom
+ <braunr> ?
+ <braunr> same error ?
+ <teythoon> well I stubbed it out
+ <braunr> oh
+ <teythoon> no, that's what I've been expecting ;)
+ <pinotree> great
+ <braunr> :)
+ <braunr> yes that's better than "mig can't find it"
+ <teythoon> braunr: in that list of active and passive translators that will
+ have to be maintained, do you expect it should carry more information
+ other than the relative path to that translator?
+ <braunr> like what ?
+ <teythoon> dunno, maybe like a port to any active translator there
+ <teythoon> should we care if any active translator dies and remove the
+ entry if there's no passive translator that could restart it again?
+ <braunr> don't add anything until you see it's necessary or really useful
+ <braunr> yes
+ <braunr> think of something like sshfs
+ <braunr> when you kill it, it's not reported by mount any more
+ <teythoon> well, for a dynamically allocated list of strings I could use
+ the argz stuff, but if we'd ever add anything else, we'd need a linked
+ list or something, maybe a hash table
+ <teythoon> yes, I thought that'd be useful
+ <braunr> use libihash for no
+ <braunr> now
+ <teythoon> braunr: but what would I use as keys? the relative path should
+ be unique (unless translators are stacked... hmmm), but that's the value
+ I'd like to store and ihash keys are pointers
+ <teythoon> stacked translators are an kinda interesting case for mtab
+ anyways...
+ <braunr> why not store the string address ?
+ <braunr> i suppose that, for stacked translators, the code querying
+ information would only return the topmost translator
+ <braunr> since this is the one which matters for regular clients (if i'm
+ right)
+ <teythoon> wouldn't that map strings that are equal but stored at different
+ locations to different values?
+ <teythoon> that'd defeat the point
+ <teythoon> I suppose so, yes
+ <braunr> then add a layer that looks for existing strings before adding
+ <braunr> the list should normally be small so a linear lookup is fine
+ <teythoon> yeah sure, but then there's little advantage of using ihash in
+ the first place, isn't it?
+ <braunr> over what ?
+ <teythoon> over not using it at all
+ <braunr> how would you store the list then ?
+ <teythoon> it's either ll or ll+ihash
+ <braunr> uh no
+ <braunr> let me check
+ <braunr> there is ihash_iterate
+ <braunr> so you don't need a linked list
+ <teythoon> so how do I store my list of strings to deduplicate the keys?
+ <braunr> you store pointers
+ <braunr> and on addition, you iterate over all entries, making sure none
+ matches the new one
+ <braunr> and if it does, you replace it i guess
+ <braunr> depending on how you design the rest
+ <teythoon> in an dynamically allocated region of memory?
+ <braunr> i don't understand
+ <braunr> your strings should be dynmaically allocate, yes
+ <teythoon> no the array of char *
+ <braunr> your data structure being managed by libihash, you don't care
+ about allocation
+ <braunr> what array ?
+ <teythoon> ah, got it...
+ <teythoon> right.
+ <braunr> there is only one structure here, an ihash of char *
+ <teythoon> yes, I got the picture ;)
+ <braunr> goo
+ <braunr> d
+ <braunr> actually, the lookup wouldn't be linear since usually, hash tables
+ have stale entries
+ <teythoon> heh... what forest?!?
+ <braunr> but that's ok
+ <braunr> teythoon: ?
+ <teythoon> the one I couldn't make out b/c of all the trees...
+ <braunr> ?
+ <teythoon> ah, it's not important. there is this saying over here, not sure
+ if there's an english equivalent
+ <braunr> ok got it
+ <braunr> we have the same in french
+ <teythoon> I ran into a problem with my prototype
+ <teythoon> if an translator is set in e. g. diskfs_S_file_set_translator,
+ how do I get the path to that node?
+ <teythoon> I believe there cannot be a way to do that, b/c the mapping is
+ not bijective
+ <braunr> it doesn't have to be
+ <teythoon> ok, so how do I get *a* path for this node?
+ <braunr> that's another question
+ <braunr> do you see how the node is obtained ?
+ <braunr> np = cred->po->np;
+ <teythoon> yes
+ <braunr> the translation occurred earlier
+ <braunr> you need to find where
+ <braunr> then perhaps, you'll need to carry the path along
+ <braunr> or if you're lucky, it will still be there somewhere
+ <teythoon> the translation from path to node?
+ <braunr> yes
+ <teythoon> doesn't that happen in the client? and the client hands a file_t
+ to the file_set_translator routine?
+ <braunr> the relative lookup can't happen in the client
+ <braunr> the server can (and often does) retain information between two
+ RPCs
+ <teythoon> uh, I can access information from a previous rpc? is that
+ considered safe?
+ <braunr> think of open() then read()
+ <braunr> a simple int doesn't carry enough information
+ <braunr> that's why it's a descriptor
+ <teythoon> ah, the server retains some state, sure
+ <braunr> what it refers to is the state retained between several calls
+ <braunr> the object being invoked by clients
+ <braunr> teythoon: what is the "passive" parameter passed to
+ diskfs_S_file_set_translator ?
+ <teythoon> braunr: argz vector of the passive translator
+ <braunr> so it is a name
+ <braunr> but we also want active translators
+ <braunr> and what is active ?
+ <teythoon> not the name of the node though
+ <teythoon> active is the port (?) to the active translator
+ <teythoon> I guess
+ <braunr> fsys_t, looks that way yes
+ <braunr> i suppose you could add the path to the peropen structure
+ <teythoon> ok
+ <braunr> see diskfs_make_peropen
+ <teythoon> braunr: but translation happens in dir_lookup
+ <teythoon> in all places I've seen diskfs_make_peropen used, the path is
+ not available
+ <teythoon> why did you point me to diskfs_make_peropen?
+ <teythoon> s/dir_lookup/diskfs_lookup/
+ <teythoon> diskfs_lookup operates on struct node, so the path would have to
+ be stored there, right?
+ <braunr> teythoon: dir_lookup should call diskfs_make_peropen
+ <braunr> at least diskfs_S_dir_lookup does
+ <braunr> and the path is present there
+ <teythoon> braunr: right
+
+ <teythoon> hrm... I added a path field to struct peropen and initialize it
+ properly in diskfs_make_peropen, but some bogus values keep creeping in
+ :/
+ <braunr> first of all, make it a dynamically allocated string
+ <teythoon> it is
+ <braunr> not a fixed sized embedded array
+ <braunr> good
+ <teythoon> yes
+ <braunr> if you really need help debugging what's happening, feel free to
+ post your current changes somewhere
+ <teythoon> there is a struct literal in fsys-getroot.c, but i fixed that as
+ well
+ <teythoon> % ./mtab tmp
+ <teythoon> none tmp ../tmpfs/tmpfs writable,no-inherit-dir-group,no-sync 0
+ 0
+ <teythoon> none tmp/bar ../tmpfs/tmpfs
+ writable,no-inherit-dir-group,no-sync 0 0
+ <teythoon> none tmp/foo ../tmpfs/tmpfs
+ writable,no-inherit-dir-group,no-sync 0 0
+ <teythoon> none tmp/foo/bar ../tmpfs/tmpfs
+ writable,no-inherit-dir-group,no-sync 0 0
+ <teythoon> :)
+
+
+## IRC, freenode, #hurd, 2013-07-10
+
+ <teythoon> btw, I read getcwd.c and got the idea
+ <teythoon> however this situation is different afaict
+ <teythoon> getcwd has a port to the current working directory, right?
+ <teythoon> so they can do open_dir with .. as relative path
+ <teythoon> but all I've got is a port referencing the node the translator
+ is being attached to
+ <teythoon> s/open_dir/dir_lookup/
+ <teythoon> and that is not necessarily a directory, so dir_lookup fails
+ with not a directory
+ <teythoon> as far as I can see it is not possible to get the directory a
+ node is in from a port referencing that node
+ <teythoon> dir_lookup has to be handled by all nodes, not just directories
+ <teythoon> but file nodes only support "looking up" the empty string
+ <teythoon> not empty, but null:
+ <teythoon> This call is required to be supported by all files (even
+ non-directories) if the filename is null, and should function in that
+ case as a re-open of the file. */
+ <braunr> why do you want the directory ?
+ <braunr> 10:40 < teythoon> as far as I can see it is not possible to get
+ the directory a node is in from a port referencing that node
+ <teythoon> to readdir(3) it and figure out the name of the node the
+ translator is bound to
+ <teythoon> similar to what getcwd does
+ <braunr> that's out of the question
+ <teythoon> wasn't that was youpi was suggesting?
+ <braunr> you may have a lot of nodes in there, such a lookup shouldn't be
+ done
+ <braunr> i didn't see that detail
+ <teythoon> "│ Concerning storing the path, it's a bit sad to have to do
+ that, and
+ <teythoon> │ it'll become wrong if one moves the mount points. Another
+ way would
+ <teythoon> │ be to make the client figure it out by itself from a port to
+ the mount
+ <teythoon> │ point, much like glibc/sysdeps/mach/hurd/getcwd.c. It'll be
+ slower, but
+ <teythoon> │ should be safer. The RPC would thus return an array of
+ ports to the
+ <teythoon> │ mount points instead of an array of strings.
+ <braunr> yes i remember that
+ <braunr> but i didn't understand well how getcwd work
+ <braunr> s
+ <braunr> another scalability issue
+ <braunr> not a big one though, we rarely have translators in directories
+ with thousands of nodes
+ <braunr> so why not
+ <braunr> teythoon: do it as youpi suggested
+ <braunr> well if you can
+ <braunr> eh
+ <braunr> if not, i don't know
+ <braunr> 10:47 < teythoon> │ it'll become wrong if one moves the mount
+ points. Another way would
+ <teythoon> yes, I know... :/
+ <teythoon> well, I'm not even sure it is possible to get the directory a
+ node is in from the port referencing the node
+ <teythoon> as in, I'm not sure if the information is even there
+ <teythoon> b/c a filesystem is a tree, directories are nodes and files are
+ leafs
+ <teythoon> all non-leaf nodes reference their parent to allow traversing
+ the tree starting from any directory
+ <teythoon> but why would a leaf reference its parent(s - in case of
+ hardlinks)?
+ <braunr> uh, for the same reason ?
+ <teythoon> sure, it would be nice to do that, but I dont think this is
+ possible on unixy systems
+ <braunr> ?
+ <teythoon> you cannot say fchdir(2) to a fd that references a file
+ <braunr> do you mean /path/to/file/../ ?
+ <teythoon> yes
+ <teythoon> only that /path/to/file is given as fd or port
+ <braunr> when i pasted
+ <braunr> 10:49 < braunr> 10:47 < teythoon> │ it'll become wrong if one
+ moves the mount points. Another way would
+ <braunr> i was actually wondering if it was true
+ <teythoon> ah
+ <braunr> why can't the path be updated at the same time ?
+ <braunr> it's a relative path anyway
+ <braunr> completely managed by the parent translator
+ <teythoon> ah
+ <teythoon> right
+ <teythoon> it's still kind of hacky, but I cannot see how to do this
+ properly
+ <braunr> hacky ?
+ <teythoon> but yes, updating the path should work I guess
+ <teythoon> or sad
+ <braunr> what i find hacky is to set translators in two passes
+ <braunr> otherwise we'd only keep the translator paths
+ <braunr> not all paths
+ <teythoon> true
+ <braunr> but then, it only concerns open nodes
+ <braunr> and again, there shouldn't be too many of them
+ <braunr> so actually it's ok
+ <teythoon> braunr: I understand the struct nodes are cached in libdiskfs,
+ so wouldn't it be easier to attach the path to that struct instead of
+ struct peropen so that all peropen objects reference the same node
+ object?
+ <teythoon> so that the path can be updated if anyone dir_renames it
+ <teythoon> *all peropen objects derived from the same file name that is
+ <braunr> teythoon: i'm not sure
+ <braunr> nodes could be real nodes (i.e. inodes)
+ <braunr> there can be several paths for the same inode
+ <teythoon> braunr: I'm aware of that, but didn't we agree the other day
+ that any path would do?
+ <braunr> i don't remember we did
+ <braunr> i don't know the details well, but i don't think setting a
+ translator on a hard link should set the translator at the inode level
+ <braunr> on the other hand, if a new inode is created to replace the
+ previous one (or stack over it), then storing the path there should be
+ fine
+ <teythoon> braunr: I don't think I can update the paths if they're stored
+ in the peropen struct
+ <teythoon> how would I get a reference to all those peropen objects?
+ <braunr> ?
+ <braunr> first, what's the context when you talkb about updating paths ?
+ <teythoon> well, youpi was concerned about renaming a mount point
+ <teythoon> and you implied that this could be managed
+ <braunr> can we actually do that btw ?
+ <teythoon> what?
+ <braunr> renaming a mount point
+ <teythoon> yep, just tried
+ <braunr> i mean, on a regular unix system like linux
+ <braunr> $ mv test blah
+ <braunr> mv: cannot move `test' to `blah': Device or resource busy
+ <braunr> (using sshfs so YMMV)
+ <pinotree> do you have anything (shells, open files, etc) inside it?
+ <braunr> no
+ <braunr> i'll try with an empty loop-mounted ext4
+ <teythoon> I was testing on the Hurd, worked fine there even with a shell
+ inside
+ <braunr> same thing
+ <braunr> i consider it a bug
+ <braunr> we may want to check what posix says about it
+ <teythoon> o_O
+ <braunr> and decide not to support renaming
+ <teythoon> why?
+ <pinotree> start a discussion in ml, maybe roland can chime in
+ <braunr> it complicates things
+ <braunr> ah yes
+ <teythoon> sure, but I can move or rename a directory, why should it be
+ different with a mount point?
+ <braunr> because it's two of them
+ <braunr> they're stacked
+ <braunr> if we do want to support that, we must be very careful about
+ atomically updating all the stack
+ <teythoon> ok
+ <teythoon> braunr: I'm trying to detect dying translators to remove them
+ from the list of translators
+ <teythoon> what port can I use for that purpose?
+ <teythoon> if I use the bootstrap port, can I then use the same method as
+ init/init.c uses? just defining a do_mach_notify_dead_name function and
+ the multiplexer will call this?
+ <braunr> teythoon: possibly
+ <teythoon> braunr: we'll see shortly...
+ <teythoon> I get KERN_INVALID_CAPABILITY indicating that my bootstrap port
+ is invalid
+ <teythoon> when calling mach_port_request_notification to get the dead name
+ notification I mean
+ <braunr> is the translator already started when you do that ?
+ <teythoon> yes, at least I think so, I'm hooking into
+ diskfs_S_file_set_translator and that gets an active translators port
+ <teythoon> also the mach docs suggests that the notification port is
+ invalid, not the name port referencing the translator
+ <braunr> i guess it shouldn't
+ <braunr> oh
+ <braunr> please show the code
+ <braunr> but beware, if the translator is started, assume it could die
+ immediately
+ <teythoon> braunr: http://paste.debian.net/15371/ line 87
+ <braunr> teythoon: notify can't be bootstrap
+ <braunr> what do you have in mind when writing this ?
+ <braunr> i'm not sure i follow
+ <teythoon> I want to be notified if an active translator goes away to
+ remove it from the list of translators
+ <braunr> ok but then
+ <braunr> create a send-once right
+ <braunr> and wait on it
+ <braunr> also, why do you want to be notified ?
+ <braunr> isn't this already done ?
+ <braunr> or can't do it lazily on access attempt ?
+ <braunr> +you
+ <teythoon> in the client?
+ <braunr> in the parent server
+ <braunr> what happens currently when a translator dies
+ <braunr> is the parent notified ?
+ <braunr> or does it give an invalid right ?
+ <teythoon> ah, i think so
+ <braunr> then you don't need to do it again
+ <teythoon> right, I overlooked that
+
+
+## IRC, freenode, #hurd, 2013-07-12
+
+ <teythoon> recursively traversing all translators from / turns out to be
+ more dangerous than I expected
+ <teythoon> ... if done by a translator bound somewhere below /...
+ <teythoon> my interpretation is that the mtab translator tries to talk to
+ itself and deadlocks
+ <teythoon> (and as a side effect the whole system kinda just stops...)
+
+
+## IRC, freenode, #hurd, 2013-07-15
+
+ <youpi> teythoon: did you discuss with braunr about returning port vs path
+ in fsys_get_children?
+ <teythoon> youpi: we did
+ <teythoon> as I wrote I looked at the getcwd source you pointed me at
+ <teythoon> and I started to code up something similar
+ <teythoon> but as far as I can see there's no way to tell from a port
+ referencing a file the directory this file is located in
+ <youpi> ah, right, there was a [0] mail
+ <youpi> teythoon: because it doesn't have a "..", right
+ <teythoon> about Neals concerns, he's right about not covering passive
+ translators very well
+ <teythoon> but the solution he proposed was similar to what I tried to do
+ first
+ <youpi> I don't like half-covering passive translators at all, to be honest
+ :)
+ <youpi> either covering them completely, or not at all, would be fine
+ <teythoon> and then braunr convinced me that the "recursive" approach is
+ more elegant and hurdish, and I came to agree with him
+ <teythoon> youpi: one could scan the filesystem at translator startup and
+ populate the list
+ <youpi> by "Neal's solution", you mean an mtab registry?
+ <teythoon> yes
+ <braunr> so, let's see what linux does when renaming parent directories
+ <teythoon> mount points you mean?
+ <youpi> teythoon: browsing the whole filesystem just to find passive
+ translators is costly
+ <youpi> teythoon, braunr: and that won't prevent the user from unexpectedly
+ starting other translators at will
+ <braunr> scary
+ <teythoon> youpi: but that requires the privilege to open the device
+ <youpi> the fact that a passive translator is set is nothing more than a
+ user having the intent of starting a translator
+ <braunr> linux retains the original path in the mount table
+ <youpi> heh
+ <teythoon> youpi: any unprivileged user can trigger a translator startup
+ <youpi> sure, but root can do that too
+ <youpi> and expect the system to behave nicely
+ <teythoon> but if I'm root and want to fsck something, I won't start
+ translators accessing the device just before that
+ <teythoon> but if there's a passive translator targetting the device,
+ someone else might do that
+ <youpi> root does not always completely control what he's doing
+ <youpi> linux for instance does prevent from mounting a filesystem being
+ checked
+ <teythoon> but still, including passive translators in the list would at
+ least prevent anyone starting an translator by accident, isn't that worth
+ doing then?
+ <youpi> if there's a way to prevent root too, that's better than having a
+ half-support for something which we don't necessarily really want
+ <youpi> (i.e. an exclusive lock on the underlying device)
+ <teythoon> right, that would also do the trick
+ <teythoon> btw, some programs or scripts seem to hardcode /proc/mounts and
+ procfs and I cannot bind a translator to /proc/mounts since it is
+ read-only and the node does not exist
+ <kilobug> IMHO automatically starting translators is a generic feature, and
+ passive translator is just a specific instance of it; but we could very
+ well have, like an "autofs" that automatically start translators in tar
+ archives and iso images, allowing to cd into any tar/iso on the system;
+ implementing such things is part of the Hurd flexibility, the "core
+ system" shouldn't be too aware on how translators are started
+ <youpi> so in the end, storing where the active translator was started
+ first seems okayish according to what linux has been exposing for decades
+ <youpi> kilobug: indeed
+ <teythoon> it could serve a mounts with a passive translator by default, or
+ a link to /run/mtab, or an simple file so we could bind a translator to
+ that node
+ <youpi> I'd tend to think that /proc/mounts should be a passive translator
+ and /run/mtab / /etc/mtab a symlink to it
+ <youpi> not being to choose the translator is a concern however
+ <teythoon> ok, I'll look into that
+ <youpi> it could be an empty file, and people be able to set a translator
+ on it
+ <teythoon> if it had a passive translator, people still could bind their
+ own translator to it later on, right?
+ <teythoon> afaics the issue currently is mostly, that there is no mounts
+ node and it is not possible to create one
+ <youpi> right
+ <teythoon> cool
+ <youpi> so with the actual path, you can even check for caller's permission
+ to read the path
+ <youpi> i.e. not provide any more information than the user would be able
+ to get from browsing by hand
+ <teythoon> sure, that concern of Neil's is easy to address
+ <youpi> I'm not so much concerned by stale paths being shown in mtab
+ <youpi> the worst that can happen is a user not being able to umount the
+ path
+ <youpi> but he can settrans -g it
+ <youpi> (which he can't on linux ;) )
+ <teythoon> yes, and the device information is still valid
+ <youpi> yes
+ <braunr> despite the parent dir being renamed, linux is still able to
+ umount the new path
+ <teythoon> and so is our current umount
+ <braunr> good
+ <teythoon> (if one uses the mount point as argument)
+ <braunr> what's the current plan concerning /proc/mounts ?
+ <teythoon> serving a node with a passive translator record
+ <braunr> ?
+ <teythoon> so that /hurd/mtab / is started on access
+ <braunr> i mean, still planning on using the recursive approach instead of
+ a registry ?
+ <teythoon> ah
+ <teythoon> I do not feel confident enough to decide this, but I agree with
+ you, it feels elegant
+ <teythoon> and it works :)
+ <teythoon> modulo the translator deadlocking if it talks to itself, any
+ thoughts on that?
+ <youpi> it is a non-threaded translator I guess?
+ <teythoon> currently yes
+ <youpi> making it threaded should fix the issue
+ <teythoon> I tried to make the mtab translator multithreaded but that
+ didn't help
+ <youpi> that's odd
+ <teythoon> maybe I did it wrong
+ <braunr> i don't find it surprising
+ <braunr> well, not that surprising :p
+ <braunr> on what lock does it block ?
+ <teythoon> as far as i can see the only difference of hello and hellot-mt
+ is that it uses a different dispatcher and has lot's of locking, right?
+ <teythoon> braunr: I'm not sure, partly because that wrecked havoc on the
+ whole system
+ <teythoon> it just freezes
+ <teythoon> but it wasn't permanent. once i let it running and it recovered
+ <braunr> consider using a subhurd
+ <teythoon> ah right, I ment to set up one anyway, but my first attempts
+ were not successful, not sure why
+ <teythoon> anyway, is there a way to prevent this in the first place?
+ <teythoon> if one could compare ports that'd be helpful
+ <youpi> Mmm, did you try to simply compare the number?
+ <teythoon> with the bootstrap port I presume?
+ <youpi> Mmm, no, the send port and the receive port would be different
+ <youpi> no, with the receive port
+ <teythoon> ah
+ <braunr> comparing the numbers should work
+ <braunr> youpi: no they should be the same
+ <youpi> braunr: ah, then it should work yes
+ <braunr> that's why there are user ref counts
+ <youpi> ok
+ <braunr> only send-once rights have their own names
+ <teythoon> btw, I'll push my work to darnassus from now on,
+ e.g. http://darnassus.sceen.net/gitweb/?p=teythoon/hurd.git;a=shortlog;h=refs/heads/feature-mtab-translator-v3-wip
+
+
+## [[open_issues/libnetfs_passive_translators]]
+
+
+## IRC, freenode, #hurd, 2013-07-16
+
+ <teythoon> which port is the receive port of a translator? I mean, how is
+ it called in the source, there is no port in sight named receive anywhere
+ I looked.
+ <braunr> teythoon: what is the "receive port of a translator" ?
+ <teythoon> braunr: we talked yesterday about preventing the mtab deadlock
+ by comparing ports
+ <teythoon> I asked which one to use for the comparison, youpi said the
+ receive port
+ <braunr> i'm not sure what he meant
+ <braunr> it could be the receive port used for the RPC
+ <braunr> but i don't think it's exported past mig stub code
+ <teythoon> weird, I just reread it. I asked if i should use the bootstrap
+ port, and he said receive port, but it might have been addressed to you?
+ <teythoon> you were talking about send and receive ports being singletons
+ or not
+ <teythoon> umm
+ <braunr> no i answered him
+ <braunr> he was wondering if the receive port could actually be used for
+ comparison
+ <braunr> i said it can
+ <braunr> but still, i'm not sure what port
+ <braunr> if it's urgent, send him a mail
+ <teythoon> no, my pipeline is full of stuff I can do instead ;)
+ <braunr> :)
+
+
+## IRC, freenode, #hurd, 2013-07-17
+
+ <teythoon> braunr: btw, comparing ports solved the deadlock in the mtab
+ translator rather easily
+ <braunr> :)
+ <braunr> which port then ?
+ <teythoon> currently I'm stuck though, I'm not sure how to address Neals
+ concern wrt to access permission checks
+ <teythoon> I believe it's called control port
+ <braunr> ok
+ <teythoon> the one one gets from doing the handshake with the parent
+ <braunr> i thought it was the bootstrap port
+ <braunr> but i don't know the details so i may be wrong
+ <braunr> anyway
+ <teythoon> yes
+ <braunr> what is the permission problem again ?
+ <teythoon> 871u73j4zp.wl%neal@walfield.org
+ <braunr> well, you could perform a lookup on the stored path
+ <braunr> as if opening the node
+ <teythoon> if I look at any server implementation of a procedure from
+ fs.defs (say libtrivfs/file-chmod.c [bad example though, that looks wrong
+ to me]), there is permission checking being done
+ <teythoon> any server implementation of a procedure from fsys.defs lacks
+ permission checks, so I guess it's being done somewhere else
+ <braunr> i must say i'm a bit lost in this discussion
+ <braunr> i don't know :/
+ <braunr> can *you* sum up the permission problem please ?
+ <braunr> i mean here, now, in just a few words ?
+ <teythoon> ok, so I'm extending the fsys api with the get_children
+ procedure
+ <teythoon> that one should not return any children x/y if the user doing
+ the request has no read permissions on x
+ <braunr> really ?
+ <braunr> why so ?
+ <teythoon> the same way ls x would not reveal the existence of y
+ <braunr> i could also say unlike cat /proc/mounts
+ <braunr> i can see why we would want that
+ <braunr> i also can see why we could let this behaviour in place
+ <braunr> let's admit we do want it
+ <teythoon> true, but I thought this could easily be addressed
+ <braunr> what you could do is
+ <teythoon> now I'm not sure b/c I cannot even find the permission checking
+ code for any fsys_* function
+ <braunr> for each element in the list of child translators
+ <braunr> perform a lookup on the stored path on behalf of the user
+ <braunr> and add to the returned list if permission checks pass
+ <braunr> teythoon: note that i said lookup on the path, which is an fs
+ interface
+ <braunr> i assume there is no permission checking for the fsys interface
+ because it's done at the file (fs) level
+ <teythoon> i think so too, yes
+ <teythoon> sure, if I only knew who made the request in the first place
+ <teythoon> the file-* options have a convenient credential handle passed in
+ as first parameter
+ <teythoon> s/options/procedures/
+ <teythoon> surely the fsys-* procedures also have a means of retrieving
+ that information, I just don't know how
+ <braunr> mig magic
+ <braunr> teythoon: see file_t in hurd_types.defs
+ <braunr> there is the macro FILE_INTRAN which is defined in subdirectories
+ (or not)
+ <teythoon> ah, retrieving the control port requires permissions, and the
+ fsys-* operations then operate on the control port?
+ <braunr> see libdiskfs/fsmutations.h for example
+ <braunr> uh yes but that's for < braunr> i assume there is no permission
+ checking for the fsys interface because it's done at the file (fs) level
+ <braunr> i'm answering < teythoon> sure, if I only knew who made the
+ request in the first place
+ <braunr> teythoon: do we understand each other or is there still something
+ fuzzy ?
+ <teythoon> braunr: thanks for the pointers, I'll read up on that a bit
+ later
+ <braunr> teythoon: ok
+
+
+## IRC, freenode, #hurd, 2013-07-18
+
+ <teythoon> braunr: back to the permission checking problem for the
+ fsys_get_children interface
+ <teythoon> I can see how this could be easily implemented in the mtab
+ translator, it asks the translator for the list of children and then
+ checks if the user has permission to read the parent dir
+ <teythoon> but that is pointless, it has to be implemented in the
+ fsys_get_children server function
+ <braunr> yes
+ <braunr> why is it pointless ?
+ <teythoon> because one could circumvent the restriction by doing the
+ fsys_get_children call w/o the mtab translator
+ <braunr> uh no
+ <braunr> you got it wrong
+ <braunr> what i suggested is that fsys_get_children does it before
+ returning a list
+ <braunr> the problem is that the mtab translator has a different identity
+ from the users accessing it
+ <teythoon> yes, but I cannot see how to do this, b/c at this point I do not
+ have the user credentials
+ <braunr> get them
+ <teythoon> how?
+ <braunr> 16:14 < braunr> mig magic
+ <braunr> 16:15 < braunr> teythoon: see file_t in hurd_types.defs
+ <braunr> 16:16 < braunr> there is the macro FILE_INTRAN which is defined in
+ subdirectories (or not)
+ <braunr> 16:16 < braunr> see libdiskfs/fsmutations.h for example
+ <teythoon> i saw that
+ <braunr> is there a problem i don't see then ?
+ <braunr> i suppose you should define FSYS_INTRAN rather
+ <braunr> but the idea is the same
+ <teythoon> won't that change all the function signatures of the fsys-*
+ family?
+ <braunr> that's probably the only reason not to implement this feature
+ right now
+ <teythoon> then again, that change is probably easy and mechanic in nature,
+ might be an excuse to play around with coccinelle
+ <braunr> why not
+ <braunr> if you have the time
+ <teythoon> right, if this can be done, the mtab translator (if run as root)
+ could get credentials matching the users credentials to make that
+ request, right?
+ <braunr> i suppose
+ <braunr> i'm not sure it's easy to make servers do requests on behalf of
+ users on the hurd
+ <braunr> which makes me wonder if the mtab functionality shouldn't be
+ implemented in glibc eheheh ....
+ <braunr> but probably not
+ <teythoon> well, I'll try out the mig magic thing and see how painful it is
+ to fix everything ;)
+ <braunr> good luck
+ <braunr> honestly, i'm starting to think it's deviating too much from your
+ initial goal
+ <braunr> i'd be fine with a linux-like /proc/mounts
+ <braunr> with a TODO concerning permissions
+ <teythoon> ok, fine with me :)
+ <braunr> confirm it with the other mentors please
+ <braunr> we have to agree quickly on this
+ <teythoon> y?
+
+ <teythoon> braunr: I actually believe that the permission issue can be
+ addressed cleanly and unobstrusively
+ <teythoon> braunr: would you still be opposed to the get_children approach
+ if that is solved?
+ <teythoon> the filesystem is a tree and the translators "creating" that
+ tree are a more coarse version of that tree
+ <teythoon> having a method to traverse that tree seems natural to me
+ <braunr> teythoon: it is natural
+ <braunr> i'm just worried it's a bit too complicated, unnecessary, and
+ out-of-scope for the problem at hand
+ <braunr> (which is /proc/mounts, not to forget it)
+
+
+## IRC, freenode, #hurd, 2013-07-19
+
+ <teythoon> braunr: I think you could be a bit more optimistic and
+ supportive of the decentralized approach
+ <teythoon> I know the dark side has cookies and strong language and it's
+ mighty tempting
+ <teythoon> but both are bad for you :p
+
+
+## IRC, freenode, #hurd, 2013-07-22
+
+ <youpi> teythoon: AIUI, you should be able to run the mtab translator as
+ no-user (i.e. no uid)
+ <teythoon> youpi: yes, that works fine
+
+ <youpi> teythoon: so there is actually no need to define FSYS_INTRAN, doing
+ it by hand as you did is fine, right?
+ <youpi> (/me backlogs mails...)
+ <teythoon> youpi: yes, the main challenge was to figure out what mig does
+ and how the cpp is involved
+ <youpi> heh :)
+ <teythoon> my patch does exactly the same, but only for this one server
+ function
+ <teythoon> youpi: I'm confused by your mail, why are read permissions on
+ all path components necessary?
+ <braunr> teythoon: only execution normally
+ <youpi> teythoon: to avoid letting a user discover a translator running on
+ a hidden directory
+ <teythoon> braunr: exactly, and that is tested
+ <youpi> e.g. ~/home/foo is o+x, but o-r
+ <youpi> and I have a translator running on ~/home/foo/aZeRtYuyU
+ <youpi> I don't want that to show up on /proc/mounts
+ <braunr> youpi: i don't understand either: why isn't execution permission
+ enough ?
+ <teythoon> youpi: but that requires testing for read on the *last*
+ component of the *dirname* of your translator, and that is tested
+ <youpi> let me take another example :)
+ <youpi> e.g. ~/home/foo/aZeRtYuyU is o+x, but o-r
+ <youpi> and I have a translator running on ~/home/foo/aZeRtYuyU/foo
+ <youpi> ergl sorry, I meant this actually:
+ <teythoon> yes, that won't show up then in the mtab for users that are not
+ you and not root
+ <youpi> e.g. ~/home/foo is o+x, but o-r
+ <youpi> and I have a translator running on ~/home/foo/aZeRtYuyU/foo
+ <teythoon> ah
+ <teythoon> hmm, good point
+ <braunr> ?
+ * braunr still confused
+ <teythoon> well, qwfpgjlu is the secret
+ <teythoon> and that is revealed by the fsys_get_children procedure
+ <braunr> then i didn't understand the description of the call right
+ <braunr> > + /* check_access performs the same permission check as is
+ normally
+ <braunr> > + done, i.e. it checks that all but the last path components
+ are
+ <braunr> > + executable by the requesting user and that the last
+ component is
+ <braunr> > + readable. */
+ <teythoon> braunr: youpi argues that this is not enough in this case
+ <braunr> from that, it looks ok to me
+ <youpi> the function and the documentation agree, yes
+ <youpi> but that's not what we want
+ <braunr> and that's where i fail to understand
+ <youpi> again, see my example
+ <braunr> i am
+ <braunr> 10:43 < youpi> e.g. ~/home/foo is o+x, but o-r
+ <braunr> ok
+ <youpi> so the user is not supposed to find out the secret
+ <braunr> then your example isn't enough to describe what's wron
+ <braunr> g
+ <youpi> checking read permission only on ~/home/foo/aZeRtYuyU will not
+ garantee that
+ <braunr> ah
+ <braunr> i thought foo was the last component
+ <youpi> no, that's why I changed my example
+ <braunr> hum
+ <braunr> 10:43 < youpi> e.g. ~/home/foo is o+x, but o-r
+ <braunr> 10:43 < youpi> and I have a translator running on
+ ~/home/foo/aZeRtYuyU/foo
+ <braunr> i meant, the last foo
+ <teythoon> still, this is easily fixed
+ <youpi> sure
+ <youpi> just has to be :)
+ <teythoon> youpi, braunr: so do you think that this approach will work?
+ <youpi> I believe so
+ <braunr> i still don't see the problem, so don't ask me :)
+ <braunr> i've been sick all week end and hardly slept, which might explain
+ <braunr> in the example, "all but the last path components" is
+ "~/home/foo/aZeRtYuyU"
+ <braunr> right ?
+ <youpi> braunr: well, I haven't looked at the details
+ <youpi> but be it the last, or but-last doesn't change the issue
+ <youpi> if my ~/hidden is o-r,o+x
+ <youpi> and I have a translator on ~/hidden/a/b/c/d/e
+ <youpi> checking only +x on hidden is not ok
+ <braunr> but won't the call also check a b c d ?
+ <youpi> yes, but that's not what matters
+ <youpi> what matters is that hidden is o-r
+ <braunr> hm
+ <youpi> so the mtab translator is not supposed to reveal that there is an
+ "a" in there
+ <braunr> ok i'm starting to understand
+ <braunr> so r must be checked on all components too
+ <youpi> yes
+ <braunr> right
+ <youpi> to simulate the user doing ls, cd, ls, cd, etc.
+ <braunr> well, not cd
+ <braunr> ah
+ <youpi> for being able to do ls, you have to be able to do cd
+ <braunr> as an ordered list of commands
+ <braunr> ok
+ <teythoon> agreed. can you think of any more issues?
+ <braunr> so both x and r must be checked
+ <youpi> so in the end this RPC is really a shortcut for a find + fsysopts
+ script
+ <youpi> teythoon: I don't see any
+ <braunr> teythoon: i couldn't take a clear look at the patch but
+ <braunr> do you perform a lookup on all nodes ?
+ <teythoon> yes, all nodes on the path from the root to the one specified by
+ the mount point entry in the active translator list
+ <braunr> let me rephrase
+ <braunr> do you at some point do a lookup, similar to a find, on all nodes
+ of a translator ?
+ <teythoon> no
+ <braunr> good
+ <teythoon> yes
+ <braunr> iirc, neal raised that concern once
+ <teythoon> and I'll also fix settrans --recursive not to iterate over *all*
+ nodes either
+ <braunr> great
+ <braunr> :)
+ <teythoon> fsys_set_options with do_children=1 currently does that (I've
+ only looked at the diskfs version)
+
+
+## IRC, freenode, #hurd, 2013-07-27
+
+ <teythoon> youpi: ah, I just found msg_get_init_port, that should make the
+ translator detection feasible
+
+
+## IRC, freenode, #hurd, 2013-07-31
+
+ <teythoon> braunr: can I discover the sender of an rpc message?
+ <braunr> teythoon: no
+ <braunr> teythoon: what do you mean by "sender" ?
+ <teythoon> braunr: well, I'm trying to do permission checks in the
+ S_proc_mark_essential server function
+ <braunr> ok so, the sending user
+ <braunr> that should be doable
+ <teythoon> I've got a struct proc *p courtesy of a mig intran mutation and
+ a port lookup
+ <teythoon> but that is not necessarily the sender, right?
+ <braunr> proc is really the server i know the least :/
+ <braunr> there is permission checking for signals
+ <braunr> it does work
+ <braunr> you should look there
+ <teythoon> yes, there are permission checks there
+ <teythoon> but the only argument the rpc has is a mach_port_t refering to
+ an object in the proc server
+ <braunr> yes
+ <teythoon> anyone can obtain such a handle for any process, no?
+ <braunr> can you tell where it is exactly please ?
+ <braunr> i don't think so, no
+ <teythoon> what?
+ <braunr> 14:42 < teythoon> but the only argument the rpc has is a
+ mach_port_t refering to an object in the proc server
+ <teythoon> ah
+ <braunr> the code you're referring to
+ <braunr> a common way to give privileges to public objects is to provide
+ different types of rights
+ <braunr> a public (usually read-only) right
+ <braunr> and a privileged one, like host_priv which you may have seen
+ <braunr> acting on (modifying) a remote object normally requires the latter
+ <teythoon> http://paste.debian.net/20795/
+ <braunr> i thought you were referring to existing code
+ <teythoon> well, there is existing code doing permission checks the same
+ way I'm doing it there
+ <braunr> where is it please ?
+ <braunr> mgt.c ?
+ <teythoon> proc/mgt.c (S_proc_setowner) for example
+ <teythoon> yes
+ <braunr> that's different
+ <teythoon> but anyone can obtain such a reference by doing proc_pid2proc
+ <braunr> the sender is explicitely giving the new uid
+ <braunr> yes but not anyone is already an owner of the target process
+ <braunr> (although it may look like anyone has the right to clear the owner
+ oO)
+ <teythoon> see, that's what made me worry, it is not checked who's the
+ sender of the message
+ <teythoon> unless i'm missing something here
+ <teythoon> ah
+ <teythoon> I am
+ <teythoon> pid2proc returns EPERM if one is not the owner of the process in
+ question
+ <teythoon> all is well
+ <braunr> ok
+ <braunr> it still requires the caller process though
+ <teythoon> what?
+ <braunr> see check_owner
+ <braunr> the only occurrence i find in the hurd is in libps/procstat.c
+ <braunr> MGET(PSTAT_PROCESS, PSTAT_PID, proc_pid2proc (server, ps->pid,
+ &ps->process));
+ <braunr> server being the proc server AIUI
+ <teythoon> yes, most likely
+ <braunr> but pid2proc describes this first argument to be the caller
+ process
+ <teythoon> ah but it is
+ <braunr> ?
+ <teythoon> mig magic :p
+ <teythoon> MIGSFLAGS="-DPROCESS_INTRAN=pstruct_t reqport_find (process_t)"
+ \
+ <teythoon> MIGSFLAGS="-DPROCESS_INTRAN=pstruct_t reqport_find (process_t)"
+ \
+ <braunr> ah nice
+ <braunr> hum no
+ <braunr> this just looks up the proc object from a port name, which is
+ obvious
+ <braunr> what i mean is
+ <braunr> 14:53 < braunr> MGET(PSTAT_PROCESS, PSTAT_PID, proc_pid2proc
+ (server, ps->pid, &ps->process));
+ <braunr> this is done in libps
+ <braunr> which can be used by any process
+ <braunr> server is the proc server for this process (it defines the process
+ namespace)
+ <teythoon> yes, but isn't the port to the proc server different for each
+ process?
+ <braunr> no, the port is the same (the name changes only)
+ <braunr> ports are global non-first class objects
+ <teythoon> and the proc server can thus tell with the lookup which process
+ it is talking to?
+ <braunr> that's the thing
+ <braunr> from pid2proc :
+ <braunr> S_proc_pid2proc (struct proc *callerp
+ <braunr> [...]
+ <braunr> if (! check_owner (callerp, p))
+ <braunr> check_owner (struct proc *proc1, struct proc *proc2)
+ <braunr> "Returns true if PROC1 has `owner' privileges over PROC2 (and can
+ thus get its task port &c)."
+ <braunr> callerp looks like it should be the caller process
+ <braunr> but in libps, it seems to be the proc server
+ <braunr> this looks strange to me
+ <teythoon> yep, to me too, hence my confusion
+ <braunr> could be a bug that allows anyone to perform pid2proc
+ <teythoon> braunr: well, proc_pid2proc (getproc (), 1, ...) fails with
+ EPERM as expected for me
+ <braunr> ofc it does with getproc()
+ <braunr> but what forces a process to pass itself as the first argument ?
+ <teythoon> braunr: nothing, but what else would it pass there?
+ <braunr> 14:53 < braunr> MGET(PSTAT_PROCESS, PSTAT_PID, proc_pid2proc
+ (server, ps->pid, &ps->process));
+ <braunr> everyone knows the proc server
+ <braunr> ok now, that's weird
+ <braunr> teythoon: does getproc() return the proc server ?
+ <teythoon> I think so, yes
+ <teythoon> damn those distributed systems, all of their sources are so
+ distributed too
+ <braunr> i suspect there is another layer of dark glue in the way
+ <teythoon> I cannot even find getproc :/
+ <braunr> hurdports.c:GETSET (process_t, proc, PROC)
+ <braunr> that's the dark glue :p
+ <teythoon> ah, so it must be true that the ports to the proc server are
+ indeed process specific, right?
+ <braunr> ?
+ <teythoon> well, it is not one port to the proc server that everyone knows
+ <braunr> it is
+ <braunr> what makes you think it's not ?
+ <teythoon> proc_pid2proc (getproc (), 1, ...) fails with EPERM for anyone
+ not being root, but succeeds for root
+ <braunr> hm right
+ <teythoon> if getproc () were to return the same port, the proc server
+ couldn't distinguish these
+ <braunr> indeed
+ <braunr> in which case getproc() actually returns the caller's process
+ object at its proc server
+ <teythoon> yes, that is better worded
+ <braunr> teythoon: i'm not sure it's true actually :/
+ <teythoon> braunr: well, exploit or it didn't happen
+ <braunr> teythoon: getproc() apparently returns a bootstrap port
+ <braunr> we must find the code that sets this port
+ <braunr> i have a hard time doing that :/
+ <pinotree> isn't part of the stuff which is passed to a new process by
+ exec?
+ <teythoon> braunr: I know that feeling
+ <braunr> pinotree: probably
+ <braunr> still hard to find ..
+ <pinotree> search in glibc
+ <teythoon> braunr: exec/exec.c:1654 asks the proc server for the proc
+ object to use for the new process
+ <teythoon> so how much of hurd do I have to rebuild once i changed struct
+ procinfo in hurd_types.h?
+ <teythoon> oh noez, glibc uses it too :/
+
+
+## IRC, freenode, #hurd, 2013-08-01
+
+ <teythoon> I need some pointers on building the libc, specifically how to
+ point libcs build system to my modified hurd headers
+ <teythoon> nlightnfotis: hi
+ <teythoon> nlightnfotis: you rebuild the libc right? do you have any hurd
+ specific pointers for doing so?
+ <nlightnfotis> teythoon, I have not yet rebuild the libc (I was planning
+ to, but I followed other courses of action) Thomas had pointed me to some
+ resources on the Hurd website. I can look them up for you
+ <nlightnfotis> teythoon, here are the instructions
+ http://darnassus.sceen.net/~hurd-web/open_issues/glibc/debian/
+ <nlightnfotis> and the eglibc snapshot is here
+ http://snapshot.debian.org/package/eglibc/
+ <teythoon> nlightnfotis: yeah, I found those. the thing is I changed a
+ struct in the hurd_types.h header, so now I want to rebuild the libc with
+ that header
+ <teythoon> and I cannot figure out how to point libcs build system to my
+ hurd headers
+ <teythoon> :/
+ <nlightnfotis> can you patch eglibc and build that one instead?
+ <pochu> teythoon: put your header in the appropriate /usr/include/ dir
+ <teythoon> pochu: is there no other way?
+ <pinotree> iirc nope
+ <pochu> teythoon: you may be able to pass some flag to configure, but I
+ don't know if that will work in this specific case
+ <teythoon> ouch >,< that explains why I haven't found one
+ <pochu> check ./configure --help, it's usually FOO_CFLAGS (so something
+ like HURD_CFLAGS maybe)
+ <pochu> but then you may need _LIBS as well depending on how you changed
+ the header... so in the end it's just easier to put the header in
+ /usr/include/
+ <braunr> teythoon: did you find the info for your libc build ?
+ <teythoon> braunr: well, i firmlinked my hurd_types.h into /usr/include/...
+ <braunr> ew
+ <braunr> i recommend building debian packages
+ <teythoon> but the build was not successful, looks unrelated to my changes
+ though
+ <teythoon> I tried that last week and the process took more than eight
+ hours and did not finish
+ <braunr> use darnassus
+ <braunr> it takes about 6 hours on it
+ <teythoon> I shall try again and skip the unused variants
+ <braunr> i also suggest you use ./debian/rules build
+ <braunr> and then interrupt the build process one you see it's building
+ object files
+ <braunr> go to the hurd-libc-i386 build dir, and use make lib others
+ <braunr> make lib builds libc, others is for companion libraries lik
+ libpthread
+ <braunr> actually building libc takes less than an hour
+ <braunr> so once you validate your build this way, you know building the
+ whole debian package will succedd
+ <braunr> succeed*
+ <teythoon> so how do I get the build system to pick up my hurd_types.h?
+ <braunr> sorry if this is obvious to you, you might be more familiar with
+ debian than i am :)
+ <braunr> patch the hurd package
+ <braunr> append your own version string like +teythoon.hurd.1
+ <braunr> install it
+ <braunr> then build libc
+ <braunr> i'll reboot darnassus so you have a fresh and fast build env
+ <braunr> almost a month of uptime without any major issue :)
+ <teythoon> err, but I cannot install my hurd package on darnassus, can I? I
+ don't think that'd be wise even if it were possible
+ <braunr> teythoon: rebooted, enjoy
+ <braunr> why not ?
+ <braunr> i often do it for my own developments
+ <braunr> teythoon: screen is normally available
+ <braunr> teythoon: be aware that fakeroot-tcp is known to hang when pfinet
+ is out of ports (that's a bug)
+ <braunr> it takes more time to reach that bug since a patch that got in
+ less than a year ago, but it still happens
+ <braunr> the hurd packages are quick to build, and they should only provide
+ the new header, right ?
+ <braunr> you can include the functionality too in the packages if you're
+ confident enough
+ <teythoon> but my latest work on the killing of essential processes issues
+ involves patching hurd_types.h and that in a way that breaks the ABI,
+ hence the need to rebuild the libc (afaiui)
+ <braunr> teythoon: yes, this isn't uncommon
+ <teythoon> braunr: this is much more intrusive than anything I've done so
+ far, so I'm not so confident in my changes for now
+ <braunr> teythoon: show me the patch please
+ <teythoon> braunr: it's not split up yet, so kind of messy:
+ http://paste.debian.net/21403/
+ <braunr> teythoon: did you make sure to add RPCs at the end of defs files ?
+ <teythoon> yes, I got burned by this one on my very first attempt, you
+ pointed out that mistake
+ <braunr> :)
+ <braunr> ok
+ <braunr> you're changing struct procinfo
+ <braunr> this really breaks the abi
+ <teythoon> yes
+ <braunr> i.e. you can't do that
+ <teythoon> I cannot put it at the end b/c of that variable length array
+ <braunr> you probably should add another interface
+ <teythoon> that'd be easier, sure, but this will slow down procfs even
+ more, no?
+ <braunr> that's secondary
+ <braunr> it won't be easier, breaking the abi may break updates
+ <braunr> in which case it's impossible
+ <braunr> another way would be to ues a new procinfo struct
+ <braunr> like struct procinfo2
+ <braunr> but then you need a transition step so that all users switch to
+ that new version
+ <braunr> which is the best way to deal with these issues imo, but this time
+ not the easiest :)
+ <teythoon> ok, so I'll introduce another rpc and make sure that one is
+ extensible
+ <braunr> hum no
+ <braunr> this usually involves using a version anyway
+ <teythoon> no? but it is likely that we need to save more addresses of this
+ kind in the future
+ <braunr> in which case it will be hanlded as an independant problem with a
+ true solution such as the one i mentioned
+ <teythoon> it could return an array of vm_address_ts with a length
+ indicating how many items were returned
+ <braunr> it's ugly
+ <braunr> the code is already confusing enough
+ <braunr> keep names around for clarity
+ <teythoon> ok, point taken
+ <braunr> really, don't mind additional RPCs when first adding new features
+ <braunr> once the interface is stable, a new and improved version becomes a
+ new development of its own
+ <braunr> you're invited to work on that after gsoc :)
+ <braunr> but during gsoc, it just seems like an unnecessary burden
+ <teythoon> ok cool, I really like that way of extending Hurd, it's really
+ easy
+ <teythoon> and feels so natural
+ <braunr> i share your concern about performances, and had a similar problem
+ when adding page cache information to gnumach
+ <braunr> in the end, i'll have to rework that again
+ <braunr> because i tried to extend it beyond what i needed
+ <teythoon> true, I see how that could happen easily
+ <braunr> the real problem is mig
+ <braunr> mig limits subsystems to 100 calls
+ <braunr> it's clearly not enough
+ <braunr> in x15, i intend to use 16 bits for subsystems and 16 bits for
+ RPCs, which should be plenty
+ <teythoon> that limit seems rather artificial, it's not a power of two
+ <braunr> yes it is
+ <teythoon> so let's fix it
+ <braunr> mach had many artificial static limits
+ <braunr> eh :D
+ <braunr> not easy
+ <braunr> replies are encoded by taking the request ID and adding 100
+ <teythoon> uh
+ <braunr> "uh" indeed
+ <teythoon> so we need an intermediate version of mig that accepts both
+ id+100 and dunno id+2^x as replies for id
+ <teythoon> or -id - 1
+ <braunr> that would completely break the abi
+ <teythoon> braunr: how so? the change would be in the *_server functions
+ and be compatible with the old id scheme
+ <braunr> how do you make sure id+2^x doesn't conflict with another id ?
+ <teythoon> oh, the id is added to the subsystem id?
+ <teythoon> to obtain a global message id?
+ <braunr> yes
+ <teythoon> ah, I see
+ <teythoon> ah, but the hurd subsystems are 1000 ids apart
+ <teythoon> so id+100 or id +500 would work
+ <braunr> we need to make sure it's true
+ <braunr> always true
+ <teythoon> so how many bits do we have for the message id in mach?
+ <teythoon> (mig?)
+ <braunr> mach shouldn't care, it's entirely a mig thing
+ <braunr> well yes and no
+ <braunr> mach defines the message header, which includes the message id
+ <braunr> see mach/message.h
+ <braunr> mach_msg_id_t msgh_id;
+ <braunr> typedef integer_t mach_msg_id_t;
+ <teythoon> well, if that is like a 32 bit integer, then allow -id-1 as
+ reply and forbid ids > 2^x / 2
+ <braunr> yes
+ <braunr> seems reasonable
+ <teythoon> that'd give us an smooth upgrade path, no?
+ <braunr> i think so
+
+
+## IRC, freenode, #hurd, 2013-08-28
+
+ <youpi> teythoon: Mmm, your patch series does not make e.g. ext2fs provide
+ a diskfs_get_source, does it?
+
+
+## IRC, freenode, #hurd, 2013-08-29
+
+ <teythoon> youpi: that is correct
+ <youpi> teythoon: Mmm, I must be missing something then: as such the patch
+ series introduces an RPC, but only EOPNOTSUPP is ever returned in all
+ cases for now?
+ <youpi> ah
+ <youpi> /* Guess based on the last argument. */
+ <youpi> since ext2fs & such report their options with store last, it seems
+ ok indeed
+ <youpi> it still seems a bit lame not to return that information in
+ get_source
+ <teythoon> yes
+ <teythoon> well, if it had been just for me, I would not have created that
+ rpc, but only guessing was frowned uppon iirc
+ <teythoon> then again, maybe this should be used and then the mtab
+ translator could skip any translators that do not provide this
+ information to filter out non-"filesystem" translators
+ <youpi> guessing is usually trap-prone, yes
+ <youpi> if it is to be used by mtab, then maybe it should be documented as
+ being used by mtab
+ <youpi> otherwise symlink would set a source, for instance
+ <youpi> while we don't really want it here
+ <teythoon> why would the symlink translator answer to such requests? it is
+ not a filesystem-like translator
+ <youpi> no, but the name & documentation of the RPC doesn't tell it's only
+ for filesystem-like translators
+ <youpi> well, the documentation does say "filesystem"
+ <youpi> but it does not clearly specify that one shouldn't implement
+ get_source if one is not a filesystme
+ <youpi> "If the concept of a source is applicable" works for a symlink
+ <youpi> that could be the same for eth-filter, etc.
+ <teythoon> right
+ <youpi> Mmm, that said it's fsys.defs
+ <youpi> not io.defs
+ <youpi> teythoon: it is the fact that we get EOPNOTSUPP (i.e. fsys
+ interface supported, just not that call), and not MIG_BAD_ID (i.e. fsys
+ interface not supported), that filters out symlink & such, right?
+ <teythoon> that's what I was thinking, but that's based on my
+ interpretation of EOPNOPSUPP of course ;)
+ <youpi> teythoon: I believe that for whatever is a bit questionable, even
+ if you put yourself on the side that people will probably agree on, the
+ discussion will still take place so we make sure it's the right side :)
+ <youpi> (re: start/end_code)
+ <teythoon> I'm not sure I follow
+ <teythoon> youpi: /proc/pid/stat seems to be used a lot:
+ http://codesearch.debian.net/search?q=%22%2Fproc%2F.*%2Fstat%22
+ <teythoon> that does not mean that start/endcode is used, but still it
+ seems like a good thing to mimic Linux closely
+ <youpi> stat is used a lot for cpu usage for instance, yes
+ <youpi> start/endcode, I really wonder who is using it
+ <youpi> using it for kernel thread detection looks weird to me :)
+ <youpi> (questionable): I mean that even if you take the time to put
+ yourself on the side that people will probably agree on, the discussion
+ will happen
+ <youpi> it has to happen so people know they agree on it
+ <youpi> I've seen that a lot in various projects (not only CS-related)
+ <teythoon> ok, I think I got it
+ <teythoon> it's to document the reasons for (not) doing something?
+ <youpi> something like this, yes
+ <youpi> even if you look right, people will try to poke holes
+ <youpi> just to make sure :)
+ <teythoon> btw, I think it's rather unusual that our storeio experiments
+ would produce such different results
+ <teythoon> you're right about the block device, no idea why I got a
+ character file there
+ <teythoon> I used settrans -ca /tmp/hello.unzipped /hurd/storeio -T
+ gunzip:file /tmp/hello
+ <teythoon> also I tried stacking the translator on /tmp/hello directly,
+ from what I've gathered that should be possible, but I failed
+ <teythoon> ftr I use the exec server with all my patches, so the unzipping
+ code has been removed from it
+ <youpi> ah, I probably still have it
+ <youpi> it shouldn't matter here, though
+ <teythoon> I agree
+ <youpi> how would you stack it?
+ <youpi> I've never had a look at that
+ <youpi> I'm not sure attaching the translator to the node is done before or
+ after the translator has a change to open its target
+ <teythoon> right
+ <teythoon> but it could be done, if storeio used the reference to the
+ underlying node, no?
+ <youpi> yes
+ <youpi> btw, you had said at some point that you had issues with running
+ remap. Was the issue what you fixed with your patches?
+ * youpi realizes that he should have shown the remap.c source code during
+ his presentation
+ <teythoon> well, I tried to remap /servers/exec (iirc) and that failed
+ <teythoon> then again, I recently played with remap and all seemed fine
+ <teythoon> but I'm sure it has nothing to do with my patches
+ <youpi> ok
+ <teythoon> those I came up with investigating fakeroot-hurd
+ <teythoon> and I saw that this also aplies to remap.sh
+ <teythoon> *while
+ <youpi> yep, they're basically the same
+ <teythoon> btw, I somehow feel settrans is being abused for chroot and
+ friends, there is no translator setting involved
+ <youpi> chroot, the command? or the settrans option?
+ <youpi> I don't understand what you are pointing at
+ <teythoon> the settrans option being used by fakeroot, remap and (most
+ likely) our chroot
+ <youpi> our chroot is just a file_reparent call
+ <youpi> fakeroot and remap do start a translator
+ <teythoon> yes, but it is not being bound to a node, which is (how I
+ understand it) what settrans does
+ <teythoon> the point being that if settrans is being invoked with --chroot,
+ it does something completely different (see the big if (chroot) {...}
+ blocks)
+ <teythoon> to a point that it might be better of in a separate command
+ <youpi> Mmm, indeed, a lot of the options don't make sense for chroot
+
+
+## IRC, freenode, #hurd, 2013-09-06
+
+ <braunr> teythoon: do you personally prefer /proc being able to implement
+ /proc/self on its own, or using the magic server to tell clients to
+ resolve those specific cases themselves ?
+ <pinotree> imho solving the "who's the sender of an rpc" could solve both
+ the SCM_CREDS implementation and the self case in procfs
+
+[[open_issues/SENDMSG_SCM_CREDS]],
+[[hurd/translator/procfs/jkoenig/discussion]], *`/proc/self`*.
+
+ <braunr> pinotree: yes
+ <braunr> but that would require servers impersonating users to some extent
+ <braunr> and this seems against the hurd philosophy
+ <pinotree> and there was also the fact that you could create a
+ fake/different port when sending an rpc
+ <braunr> to fake what ?
+ <pinotree> the sender identiy
+ <pinotree> *identity
+ <braunr> what ?
+ <braunr> you mean intermediate servers can do that
+ <teythoon> braunr: I don't know if I understand all the implications of
+ your question, but the magic server is the only hurd server that actually
+ implements fsys_forward (afaics), so why not use that?
+ <braunr> teythoon: my question was rather about the principle
+ <braunr> do people find it acceptable to entrust a server with their
+ authority or not
+ <braunr> on the hurd, it's clearly wrong
+ <braunr> but then it means you need special cases everywhere, usually
+ handled by glibc
+ <braunr> and that's something i find wrong too
+ <braunr> it restricts extensibility
+ <braunr> the user can always change its libc at runtime, but in practice,
+ it's harder to perform than simply doing it in the server
+ <teythoon> braunr: then I think I didn't get the question at all
+ <braunr> teythoon: it's kind of the same issue that you had with the mtab
+ translator
+ <braunr> about showing or not some entries the user normally doesn't have
+ access to
+ <braunr> this problem occurs when there is more than one server on the
+ execution path and the servers beyond the first one need credentials to
+ reply something meaningful
+ <braunr> the /proc/self case is a perfect one
+ <braunr> (conceptually, it's client -> procfs -> symlink)
+ <braunr> 1/ procfs tells the client it needs to handle this specially,
+ which is what the hurd does with magic
+ <braunr> 2/ procfs assumes the identity of the client and the symlink
+ translator can act as expected because of that
+ <braunr> teythoon: what way do you find better ?
+ <teythoon> braunr: by "procfs assumes the identity" you mean procfs
+ impersonating the user?
+ <braunr> yes
+ <teythoon> braunr: tbh I still do not see how this can be implemented at
+ all b/c the /proc/self symlink is not about identity (which can be
+ derived from the peropen struct initially created by fsys_getroot) but
+ the pid of the callee (which afaics is nowhere to be found)
+ <teythoon> s/callee/caller/
+ <teythoon> the one doing the rpc
+ <braunr> impersonating the user isn't only about identity
+ <braunr> actually, it's impersonating the client
+ <teythoon> yes, client is the term >,<
+ <braunr> so basically, asking proc about the properties of the process
+ being impersonated
+ <teythoon> proc o_O
+ <braunr> it's not hard, it's just a big turn in the way the system would
+ function
+ <braunr> teythoon: ?
+ <teythoon> you lost me somewhere
+ <braunr> the client is the process
+ <braunr> not the user
+ <teythoon> in order to implement /proc/self properly, one has to get the
+ process id of the process doing the /proc/self lookup, right?
+ <braunr> yes
+ <braunr> actually, we would even slice it more and have the client be a
+ thread
+ <teythoon> so how do you get to that piece of information at all?
+ <braunr> the server inherits a special port designating the client, which
+ allows it to query proc about its properties, and assume it's identity in
+ servers such as auth
+ <braunr> its*
+ <teythoon> ah, but that kind of functionality isn't there at the moment, is
+ it?
+ <braunr> it's not, by design
+ <teythoon> right, hence my confusion
+ <braunr> instead, servers use the magic translator to send a "retry with
+ special handling" message to clients
+ <teythoon> right, so the procfs could bounce that back to the libc handler
+ that of course knows its pid
+ <braunr> yes
+ <teythoon> right, so now at last I got the whole question :)
+ <braunr> :)
+ <teythoon> ugh, I just found the FS_RETRY_MAGICAL handler in the libc :-/
+ <braunr> ?
+ <braunr> why "ugh" ?
+ <teythoon> well, I'm inclined to think this is the bad kind of magic ;)
+ <braunr> do i need to look at the code to understand ?
+ <teythoon> ok, so I think option 1/ is easily implemented, option 2/ has
+ consequences that I cannot fully comprehend
+ <braunr> same for me
+ <teythoon> no, but you yourself said that you do not like that kind of
+ logic being implemented in the libc
+ <braunr> well
+ <braunr> easily
+ <braunr> i'm not so sure
+ <braunr> it's easy to code, but i assume checking for magic replies has its
+ cost
+ <teythoon> why not? the code is doing a big switch over the retryname
+ supplied by the server
+ <teythoon> we could stuff getpid() logic in there
+ <braunr> 14:50 < braunr> it's easy to code, but i assume checking for magic
+ replies has its cost
+ <teythoon> what kind of cost? computational cost?
+ <braunr> yes
+ <braunr> the big switch you mentioned
+ <braunr> run every time a client gets a reply
+ <braunr> (unless i'm mistaken)
+ <teythoon> a only for RETRY_MAGICAL replies
+ <braunr> but you need to test for it
+ <teythoon> switch (retryname[0])
+ <teythoon> {
+ <teythoon> case '/':
+ <teythoon> ...
+ <teythoon> that should compile to a jump table, so the cost of adding
+ another case should be minimal, no?
+ <braunr> yes
+ <braunr> but
+ <braunr> it's even less than that
+ <braunr> the real cost is checking for RETRY_MAGICAL
+ <braunr> 14:55 < teythoon> a only for RETRY_MAGICAL replies
+ <braunr> so it's basically a if
+ <braunr> one if, right ?
+ <teythoon> no, it's switch'ing over doretry
+ <teythoon> you should pull up the code and see for yourself. it's in
+ hurd/lookup-retry.c
+ <braunr> ok
+ <braunr> well no, that's not what i'm looking for
+ <teythoon> it's not o_O
+ <braunr> i'm looking for what triggers the call to lookup_retry
+ <braunr> teythoon: hm ok, it's for lookups only, that's decent
+ <braunr> teythoon: 1/ has the least security implications
+ <teythoon> yes
+ <braunr> it could slightly be improved with e.g. a well defined interface
+ so a user could preload a library to extend it
+ <teythoon> extend the whole magic lookup thing?
+ <braunr> yes
+ <teythoon> but that is no immediate concern, you are trying to fix
+ /proc/self, right?
+ <braunr> no, i'm thinking about the big picture for x15/propel, keeping the
+ current design or doing something else
+ <teythoon> oh, okay
+ <braunr> solving /proc/self looks actually very easy
+ <teythoon> well, I'd say this depends a lot on your trust model then
+ <teythoon> do you consider servers trusted?
+ <teythoon> (btw, will there be mutual authentication of clients/servers in
+ propel?)
+ <braunr> there were very interesting discussions about that during the
+ l4hurd project
+ <braunr> iirc, shapiro insisted that using a server without trusting it
+ (and there were specific terminology about trusting/relying/etc..) is
+ nonsense
+ <braunr> teythoon: i haven't thought too much about that yet, for now it's
+ supposed to be similar to what the hurd does
+ <teythoon> hm, then again trust is not an on/off thing imho
+ <braunr> ?
+ <teythoon> trusting someone to impersonate yourself is a very high level of
+ trust
+ <teythoon> s/is/requires/
+ <teythoon> the mobile code paper suggests that mutual authentication might
+ be a good thing, and I tend to agree
+ <braunr> i'll have to read that again
+ <braunr> teythoon: for now (well, when i have time to work on it again
+ .. :))
+ <braunr> i'm focusing on the low level stuff, in a way that won't disturb
+ such high level features
+ <braunr> teythoon: have you found something related to a thread-specific
+ port in the proc server ?
+ <braunr> hurd/process.defs:297: /* You are not expected to understand
+ this. */
+ <braunr> \o/
+ <teythoon> braunr: no, why would I (the thread related question)
+ <teythoon> braunr: yes, that comment also cought my eye :/
+ <braunr> teythoon: because you read a lot of the proc code lately
+ <braunr> so maybe your view of it is better detailed than mine
+
+
+## IRC, freenode, #hurd, 2013-09-13
+
+ * youpi crosses fingers
+ <youpi> yay, still boots
+ <youpi> teythoon: I'm getting a few spurious entries in /proc/mounts
+ <youpi> none /servers/socket/26 /hurd/pfinet interface=/dev/eth0, etc.
+ <youpi> /dev/ttyp0 /dev/ttyp0 /hurd/term name,/dev/ptyp0,type,pty-master 0
+ 0
+ <youpi> /dev/sd1 /dev/cons ext2fs
+ writable,no-atime,no-inherit-dir-group,store-type=typed 0 0
+ <youpi> fortunately mount drops most of them
+ <youpi> but not /dev/cons
+ <youpi> spurious entries in df are getting more and more common on linux
+ too anyway...
+ <youpi> ah, after a console restart, I don't have it any more
+ <youpi> I'm getting df: `/dev/cons': Operation not supported instead
+
+
+## IRC, freenode, #hurd, 2013-09-16
+
+ <youpi> teythoon: e2fsck does not seem to be seeing that a given filesystem
+ is mounted
+ <youpi> /dev/sd0s1 on /boot type ext2 (rw,no-inherit-dir-group)
+ <youpi> and still # e2fsck -C 0 /dev/sd0s1
+ <youpi> e2fsck 1.42.8 (20-Jun-2013)
+ <youpi> /dev/sd0s1 was not cleanly unmounted, check forced.
+ <youpi> (yes, both /etc/mtab and /run/mtab point to /proc/mounts)
+ <tschwinge> Yes, that is a "known" problem.
+ <youpi> tschwinge: no, it's supposed to be fixed by the mtab translator :)
+ <pinotree> youpi: glibc's paths.h points to /var/run/mtab (for us)
+ <tschwinge> youpi: Oh. But this is by means of mtab presence, and not by
+ proper locking? (Which is at least something, of course!)
+ <youpi> /var/run points to /run
+ <youpi> tschwinge: yes
+ <youpi> anyway, got to run
+
+
+## IRC, freenode, #hurd, 2013-09-20
+
+ <braunr> teythoon: how come i see three mtab translators running ?
+ <braunr> 6 now oO
+ <braunr> looks like df -h spawns a few every time
+ <teythoon> yes, weird...
+ <braunr> accessing /proc/mounts does actually
+ <braunr> teythoon: more bug fixing for you :)
+
+
+## IRC, freenode, #hurd, 2013-09-23
+
+ <teythoon> so it might be a problem with either libnetfs (which afaics has
+ never supported passive translator records before) or procfs, but tbh I
+ haven't investigated this yet
diff --git a/community/gsoc/project_ideas/object_lookups.mdwn b/community/gsoc/project_ideas/object_lookups.mdwn
index 5075f783..88ffc633 100644
--- a/community/gsoc/project_ideas/object_lookups.mdwn
+++ b/community/gsoc/project_ideas/object_lookups.mdwn
@@ -40,3 +40,32 @@ accurate measurements in a system that lacks modern profiling tools would also
be helpful.
Possible mentors: Richard Braun
+
+
+# IRC, freenode, #hurd, 2013-09-18
+
+In context of [[!message-id "20130918081345.GA13789@dalaran.sceen.net"]].
+
+ <teythoon> braunr: (wrt the gnumach HACK) funny, I was thinking about doind
+ the same for userspace servers, renaming ports to the address of the
+ associated object, saving the need for the hash table...
+ <braunr> teythoon: see
+ http://darnassus.sceen.net/~hurd-web/community/gsoc/project_ideas/object_lookups/
+ <braunr> teythoon: my idea is to allow servers to set a label per port,
+ obtained at mesage recv time
+ <braunr> because, yes, looking up an object twice is ridiculous
+ <braunr> you normally still want port names to be close to 0 because it
+ allows some data structure optimizations
+ <teythoon> braunr: yes, I feared that ports should normally be smallish
+ integers and contigious at best
+ <teythoon> braunr: interesting that you say there that libihash suffers
+ from high collision rates
+ <teythoon> I've a theory to why that is, libihash doesn't do any hashing at
+ all
+ <pinotree> there are notes about that in the open_issues section of the
+ wiki
+ <teythoon> but I figured that this is probably ok for port names, as they
+ are small and contigious
+ <neal> braunr: That's called protected payload.
+ <neal> braunr: The idea is that the kernel appends data to the message in
+ flight.
diff --git a/community/gsoc/project_ideas/sound/discussion.mdwn b/community/gsoc/project_ideas/sound/discussion.mdwn
new file mode 100644
index 00000000..4a95eb62
--- /dev/null
+++ b/community/gsoc/project_ideas/sound/discussion.mdwn
@@ -0,0 +1,47 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+[[!taglink open_issue_documentation]]: update [[sound]] page.
+
+
+# IRC, freenode, #hurd, 2013-09-01
+
+ <rekado> I'm new to the hurd but I'd love to learn enough to work on sound
+ support.
+ <rekado>
+ http://darnassus.sceen.net/~hurd-web/community/gsoc/project_ideas/sound/
+ says drivers should be ported to GNU Mach as a first step.
+ <rekado> Is this information still current or should the existing Linux
+ driver be wrapped with DDE instead?
+ <auronandace> if i recall correctly dde is currently only being used for
+ network drivers. i'm not sure how much work would be involved for sound
+ or usb
+
+
+## IRC, freenode, #hurd, 2013-09-02
+
+ <rekado> The sound support proposal
+ (http://darnassus.sceen.net/~hurd-web/community/gsoc/project_ideas/sound/)
+ recommends porting some other kernel's sound driver to GNU Mach. Is this
+ still current or should DDE be used instead?
+ <pinotree> rekado: dde or anything userspace-based is generally preferred
+ <braunr> rekado: both are about porting some other kernel's sound driver
+ <braunr> dde is preferred yes
+ <rekado> This email says that sound drivers are already partly working with
+ DDE: http://os.inf.tu-dresden.de/pipermail/l4-hackers/2009/004291.html
+ <rekado> So, should I just try to get some ALSA kernel parts to compile
+ with DDE?
+ <pinotree> well, what is missing is also the dde←→hurd glue
+ <braunr> rekado: there is also a problem with pci arbitration
+ <rekado> pinotree: I assumed DDEKit works with the hurd and we could use
+ any DDE/<other kernel> glue code with it
+ * rekado looks up pci arbitration
+ <pinotree> only for networking atm
+ <rekado> ah, I see.
diff --git a/contributing.mdwn b/contributing.mdwn
index 641de8b5..75b99bbd 100644
--- a/contributing.mdwn
+++ b/contributing.mdwn
@@ -86,6 +86,8 @@ taken the time to fix it yet, but it shouldn't be very hard. The code begins
at `hurd/pfinet/ethernet.c`, `ethernet_open()`, the `device_open` call, which
produces `edev->ether_port`. Basically, one needs to catch errors like EIEIO
when using it, and in that case re-open the device.
+See also the notes on [[hurd/translator/pfinet/implementation]], *Bugs*, *IRC,
+freenode, #hurd, 2013-09-03*.
* Add a futex kernel trap to GNU Mach. This can be useful for nicer locking
primitives, including inter-process primitives. `vm_allocate` can be used as an
example in the `gnumach` source tree for how to add a kernel trap. [[!GNU_Savannah_task 6231]]
@@ -106,6 +108,7 @@ part:1:file:/home/samy/tmp/foo`). This would be libnetfs-based.
[[GSoC proposal|community/gsoc/project_ideas/valgrind ]] about this, but the
basic port could be small.
* Use libz and libbz2 in libstore. See `hurd/libstore/unzip.c` etc., they should be replaced by mere calls to libraries, [[!GNU_Savannah_task 6990]]
+See also the discussions on [[open_issues/exec]].
* Add `/proc/$pid/maps`. `vminfo` already has this kind of information, it's a matter of making procfs do the same. [[!GNU_Savannah_bug 32770]]
diff --git a/contributing/discussion.mdwn b/contributing/discussion.mdwn
index 5a6bfd7c..11e8ac0e 100644
--- a/contributing/discussion.mdwn
+++ b/contributing/discussion.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2011, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -19,3 +19,69 @@ Invent something.
# Mailing Lists
Add link to [[mailing_lists]] to page, and suggest following these.
+
+
+# IRC, freenode, #hurd, 2013-08-05
+
+ <nalaginrut> hi guys, I'm new here. I'm a developer from Guile community,
+ and I think maybe it's a proper time to do some work to make GNU stuff
+ use Guile increasingly, but I found the wiki and docs seems a bit old,
+ and I can't find an entry from Hurd source, since there're too many
+ things. Anyone point me out?
+ <nalaginrut> thanks
+ <nlightnfotis> nalaginrut what exactly is it that you need help with?
+ <nalaginrut> I've no idea, I saw MIG and I think if it's a language I can
+ write a front-end on Guile platform. But someone suggest me write hurd
+ binding will be a good start
+ <nalaginrut> I cloned incubator which is cl-binding for hurd, but I've no
+ idea too, since there's nothing in master branch
+ <pinotree> well, fixing guile on the hurd would be a start:
+ https://buildd.debian.org/status/package.php?p=guile-2.0
+ <braunr> i won't talk about this, as my personal opinion on the matter is
+ that it's not a proper time to do it
+ <braunr> but at the same time, people should do what they're interested in
+ <braunr> so feel free to do it
+ <nalaginrut> braunr: is there any reason why it's not a proper time?
+ <braunr> nalaginrut: two words: mig sucks
+ <nalaginrut> so it'll be replaced by a new stuff?
+ <teythoon> any more reasons to have alternatives, no?
+ <braunr> sure, please do it :)
+ <braunr> actually it's more than just mig
+ <braunr> the low level internals of the hurd are almost fine, but not good
+ enough to reliably develop over it
+ <braunr> gccgo is currently proving it
+ <braunr> and such projects are good opportunities to identify and fix such
+ issues
+ <braunr> but the, if you want to work on guile, be prepared to work on a
+ lot more than just guile
+ <nalaginrut> I'm afraid I have to collect the reasons and evaluate when is
+ proper to do that, if Hurd has to be redesigned, it is not a proper time
+ ;-)
+ <braunr> it also happened with openjdk, jeremie had to fix signals (!)
+ <nalaginrut> anyway, I just want a suggestion how to start
+ <pinotree> <pinotree> well, fixing guile on the hurd would be a start:
+ https://buildd.debian.org/status/package.php?p=guile-2.0
+ <nalaginrut> ok, I'll try
+ <antrik> nalaginrut: "incubator" is a somewhat strange beast. every branch
+ in there is a completely different project. you have to find the right
+ branch for the CL bindings...
+ <nalaginrut> antrik: thanks for reply, I guess it's clisp branch?
+ <pinotree> nalaginrut:
+ http://www.gnu.org/software/hurd/source_repositories/incubator.html
+ <antrik> nalaginrut: sounds like it :-)
+ <antrik> braunr: I'm believe it's important to encourage work on as many
+ different levels as possible. there is no motivation for fixing low-level
+ issues unless there are some interesting high-level things relying on
+ these...
+ <braunr> antrik: i agree
+ <braunr> 11:50 < braunr> but at the same time, people should do what
+ they're interested in
+ <antrik> in fact, it's pretty much impossible to identify what we really
+ need at the lower levels unless working on high-level stuff as well...
+ <braunr> yes
+ <braunr> 11:57 < braunr> but the, if you want to work on guile, be prepared
+ to work on a lot more than just guile
+ <nalaginrut> I prepare to work on Hurd, is that an fair answer?
+ <antrik> nalaginrut: perfect! ;-)
+ <nalaginrut> ;-) well, easy to say, but I'll try what I can do
+ <antrik> yeah, just see how far you get. might be an interesting ride :-)
diff --git a/contributing/web_pages/news/qoth_next.mdwn b/contributing/web_pages/news/qoth_next.mdwn
index 749a42bb..935784ce 100644
--- a/contributing/web_pages/news/qoth_next.mdwn
+++ b/contributing/web_pages/news/qoth_next.mdwn
@@ -25,6 +25,17 @@ else="
<!--basic structure of a QotH entry. Adapt, reduce and add points as needed. At the end, try to make the text flow as a unified whole.-->
+IRC, freenode, #hurd, 2013-05-05, in context of libpthread conversion
+
+ <braunr> ArneBab_: which also involved fixing libpthread to correctly
+ handle timed waits and cancellation
+ <braunr> although that part was done in january this year
+
+IRC, freenode, #hurd, 2013-05-10, in context of libpthread conversion
+
+ <braunr> the "significant" changes i've done in libpthreads are actually
+ related to io_select, for Q1 2013 :)
+
This quarter [hurd hacker] [item]
Also …
diff --git a/faq/sata_disk_drives/discussion.mdwn b/faq/sata_disk_drives/discussion.mdwn
new file mode 100644
index 00000000..3f063b77
--- /dev/null
+++ b/faq/sata_disk_drives/discussion.mdwn
@@ -0,0 +1,234 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+[[!tag open_issue_gnumach]]
+
+
+# IRC, freenode, #hurd, 2013-05-10
+
+ <braunr> what code have you used if any (or is it your own implementation)
+ ?
+ <youpi> I ended up writing my own implementation
+ <braunr> eh :)
+ <youpi> the libahci/ahci code from linux is full of linux-specific stuff
+ <youpi> it would mean working on gluing that
+ <youpi> which woudl rather be just done in block-dde
+ <youpi> I was told at fosdem that ahci is not actually very difficult
+ <youpi> and it isn't indeed
+ <braunr> that's why i usually encourage to use netbsd code
+
+ <braunr> any chance using ahci might speed up our virtual machines ?
+ <youpi> they are already using DMA, so probably no
+ <youpi> (with the driver I've pushed)
+ <youpi> adding support for tagged requests would permit to submit several
+ requests at a time
+ <youpi> _that_ could improve it
+ <youpi> (it would make it quite more complex too)
+ <youpi> but not so much actually
+
+ <anatoly> What about virtio? will it speed up?
+ <youpi> probably not so much
+ <youpi> because in the end it works the same
+ <youpi> the guest writes the physical addresse in mapped memory
+ <youpi> kvm performs the read into th epointed memory, triggers an irq
+ <youpi> the guest takes the irq, marks as done, starts the next request,
+ etc.
+ <youpi> most enhancements that virtio could bring can already be achieved
+ with ahci
+ <youpi> one can probably go further with virtio, but doing it with ahci
+ will also benefit bare hardware
+
+ <pinotree> http://en.wikipedia.org/wiki/AHCI
+ <youpi> anatoly: aka SATA
+ <anatoly> some sort of general protocol to work with any SATA drive via
+ AHCI-compatible host controller?
+ <braunr> yes
+
+ <youpi> braunr: I may be mistaken, but it does seem ahci is faster than ide
+ <youpi> possibly because the ide driver is full of hardcoded wait loops
+ <braunr> interesting :)
+ <youpi> usleeps here and there
+ <braunr> oh right
+ <braunr> i wonder how they're actually implemented
+ <youpi> so it would make sense to use that on shattrath
+ <youpi> a nasty buggy busy-loop
+ <braunr> yes but ending when ?
+ <youpi> when a given number of loops have elapsed
+ <youpi> that's where "buggy" applies :)
+ <braunr> ok so buggy implies the loop isn't correctly calibrated
+ <youpi> it isn't calibrated at all actually
+ <braunr> ew
+ <youpi> it was probably calibrated on some 486 or pentium hardware :)
+ <braunr> yeah that's what i imagined too
+ <braunr> we'll need some measurements but if it's actually true, it's even
+ better news
+
+
+## IRC, freenode, #hurd, 2013-05-11
+
+ <youpi> ah, also, worth mentioning: the AHCI driver supports up to 2TiB
+ disks
+ <youpi> (as opposed to our IDE driver which supports only LBA28, 128GiB)
+ <youpi> supporting more than 2TiB would require an RPC change, or using
+ bigger sectors
+ <youpi> (which wouldn't be a bad idea anyway)
+ <braunr> i think we should switch to uint64_t addressable vm_objects
+ <braunr> which would allow to support large files too
+ <youpi> braunr: yep
+
+
+## IRC, freenode, #hurd, 2013-05-13
+
+ <braunr> the hurd, running on vbox, with a sata controler :)
+ <braunr> hum, problem with an extended partition
+ <anatoly_> qemu/kbm doesn't have sata controller, am I right?
+ <braunr> anatoly: recent versions might
+ <braunr> http://wiki.qemu.org/Features/AHCI
+ <braunr> www.linux-kvm.org/wiki/images/7/73/2011-forum-ahci.pdf
+ <anatoly> braunr: found first link, too. Thanx for the second one
+ <braunr>
+ http://git.qemu.org/?p=qemu.git;a=blob;f=hw/ide/ahci.c;h=eab60961bd818c22cf819d85d0bd5485d3a17754;hb=HEAD
+ <braunr> looks ok in recent versions
+ <braunr> looks useful to have virtio drivers though
+ <anatoly> virtio is shown as fastest way for IO in the presentation
+ <anatoly> Hm, failed to run qemu with AHCI enabled
+ <anatoly> qemu 1.1 from debian testing
+ <anatoly> youpi how do run qemu with AHCI enabled?
+
+
+## IRC, freenode, #hurd, 2013-05-14
+
+ <anatoly> can somebody ask youpi how he runs qemu with AHCI please?
+ <gnu_srs> I think he used vbox? Did not find any AHCI option for kvm
+ (1.1.2-+dfsg-6)
+ <anatoly> gnu_srs: http://wiki.qemu.org/ChangeLog/0.14#IDE_.2F_AHCI
+ <anatoly> but it doesn't work for me the same version of kvm
+ <braunr_> anatoly: have you checked how the debian package builds it ?
+ <anatoly> braunr: mach sees AHCI device
+ <braunr> oh :)
+ <anatoly> the problem is in last option "-device
+ ide-drive,drive=disk,bus=ahci.0"
+ <anatoly> lvm says 'invalid option'
+ <braunr> anatoly: can you give more details please ?
+ <braunr> lvm ?
+ <anatoly> s/lvm/kvm
+ <braunr> i don't understand
+ <braunr> how can mach probe an ahci drive if you can't start kvm ?
+ <anatoly> I ran it without last option
+ <braunr> then why do you want that option ?
+ <anatoly> But, actually I entered command with mistake. I retried it and it
+ works. But got "start ext2fs: ext2fs: device:hd0s2: No such device or
+ address"
+ <anatoly> Sorry for confusing
+ <braunr> that's normal
+ <braunr> it should be sd0s2
+ <bddebian2> Right because the device names are different
+ <braunr> be aware that gnumach couln't see my extended partitions when i
+ tried that yesterday
+ <braunr> i don't know what causes the problem
+ <anatoly> Yeah, I understand, I just note about it to show that it works
+ <braunr> :)
+ <anatoly> And I was wring
+ <anatoly> s/wring/wrong
+ <braunr> is that the version in wheezy ?
+ <anatoly> I'm using testing, but it's same
+ <braunr> great
+ <braunr> the sceen.net VMs will soon use that then
+ <anatoly> I don't have extended partions
+ <anatoly> Booted with AHCI! :-)
+ <anatoly> It freezes while downloading packages for build-essential
+ fake-root dependencies with AHCI enabled
+ <youpi> anatoly: is the IRQ of the ahci controller the same as for your
+ ethernet device? (you can see that in lspci -v)
+ <anatoly> youpi: will check
+ <anatoly> youpi both uses IRQ 111
+ <anatoly> s/111/11
+ <braunr> aw
+ <youpi> anatoly: ok, that might be why
+ <youpi> is this kvm?
+ <youpi> if so, you can set up a second ahci controler
+ <youpi> and attach devices to it
+ <youpi> so the irq is not the same
+ <youpi> basically, the issue is about dde disabling the irq
+ <youpi> during interrupt handler
+ <youpi> which conflicts with ahci driver needs
+
+
+## IRC, freenode, #hurd, 2013-05-15
+
+ <anatoly> youpi: yes, it's kvm. Will try a second ahci controller
+
+ <Slex> I read recentrly was added ahci driver, is it in userland or
+ kernel-land?
+ <gnu_srs> kernel-land the change was in gnumach
+
+
+## IRC, freenode, #hurd, 2013-05-18
+
+ <youpi> about the IRQ conflict, it's simply that both dde and the ahci
+ driver need to disable it
+ <youpi> it needs to be coherent somehow
+
+
+## IRC, freenode, #hurd, 2013-05-20
+
+ <anatoly> gnu_srs: kvm -m 1G -drive
+ id=disk,file=<path_hurd_disk_img>,if=none,cache=writeback -device
+ ahci,id=ahci-1 -device ahci,id=ahci-2 -device
+ ide-drive,drive=disk,bus=ahci-2.0
+ <anatoly> who knows what does "ich9-ahci.multifunction=on/off" parameter
+ for kvm's ahci device mean?
+ <anatoly> well, I was a bit incorrect :-) The options is relative to PCI
+ miltifunction devices
+ <anatoly> s/options is relative/options relates
+
+
+## IRC, freenode, #hurd, 2013-05-24
+
+ <anatoly> I don't see freezes anymore while downloading packages with AHCI
+ enabled
+ <youpi> anatoly: by fixing the shared IRQ ?
+ <anatoly> youpi: yes, I added second AHCI as you suggested
+ <youpi> ok
+ <youpi> so it's probably the shared IRQ issue
+ <anatoly> NIC and AHCI have similar IRQ when only one AHCI is enabled
+ <anatoly> according lspci output
+ <youpi> yes
+
+
+## IRC, freenode, #hurd, 2013-06-18
+
+ <braunr> youpi: is there a simple way from hurd to check interrupts ?
+ <youpi> what do you mean by "check interrupts" ?
+ <braunr> if they're shared
+ <youpi> I still don't understand :)
+ <braunr> i'm setting up sata
+ <youpi> ah, knowing the number
+ <braunr> yes
+ <youpi> you can read that from lspci -v
+ <braunr> ok
+ <braunr> thanks
+ <braunr> hum
+ <braunr> i get set root='hd-49,msdos1' in grub.cfg when changing the
+ device.map file to point to sd0
+ <youpi> hum
+ <braunr> i wonder if it's necessary
+ <braunr> i guess i just have to tell gnumach to look for sd0, not grub
+ <braunr> youpi: the trick you mentioned was to add another controler, right
+ ?
+ <youpi> yes
+ <braunr> ok
+ <braunr> youpi: looks fine :)
+ <braunr> and yes, i left hd0 in grub's device.map
+ <braunr> although i have lots of errors on hd0s6 (/home)
+ <braunr> youpi: there must be a bug with large sizes
+ <braunr> i'll stick with ide for now, but at least setting sata with
+ libvirt was quite easy to do
+ <braunr> so we can easily switch later
diff --git a/faq/still_useful.mdwn b/faq/still_useful.mdwn
index 8d7e3f28..d08d2df7 100644
--- a/faq/still_useful.mdwn
+++ b/faq/still_useful.mdwn
@@ -68,6 +68,6 @@ various servers are designed for this sort of modification.
> drivers are actually Linux drivers running in a separate userland process.
> It also for instance provides very fine-grain virtualization support, such as
-> VPN for only one process, etc.
+> [[VPN for only one process|open_issues/virtualization/networking]], etc.
> etc. etc. The implications are really very diverse...
diff --git a/faq/system_port.mdwn b/faq/system_port.mdwn
index fc710a3e..ca96697c 100644
--- a/faq/system_port.mdwn
+++ b/faq/system_port.mdwn
@@ -47,3 +47,27 @@ Mach run as a POSIX user-space process|open_issues/mach_on_top_of_posix]], or
by implementing the [[Mach IPC|microkernel/mach/ipc]] facility (as well as
several others) as Linux kernel modules. While there have been some
experiments, no such port has been completed yet.
+
+
+# IRC, freenode, #hurd, 2013-09-05
+
+ <rah> what would be required to port the hurd to sparc?
+ <pinotree> port gnumach, write the sparc bits of mach/hurd in glibc, and
+ maybe some small parts in hurd itself too
+ <rah> what would be required to port gnumach? :-)
+ <braunr> a new arch/ directory
+ <braunr> bootstrap code
+ <braunr> pmap (mmu handling) code
+ <braunr> trap handling
+ <braunr> basic device support (timers for example)
+ <braunr> besides, sparc is a weird beast
+ <braunr> so expect to need to work around tricky issues
+ <braunr> in addition, sparc is dead
+ <rah> mmm
+ <rah> it's not totally dead
+ <rah> the T1 chips and their decendents are still in production
+ <rah> the thing is I'd like to have real hardware for the hurd
+ <rah> and if I'm going to have two machines running at once, I'd rather one
+ of them was my UltraSPARC box :-)
+ <braunr> rah: unless you work hard on it, it's unlikely you'll get it
+ <rah> braunr: of course
diff --git a/glibc/signal/signal_thread.mdwn b/glibc/signal/signal_thread.mdwn
index c6e8d69e..544d387d 100644
--- a/glibc/signal/signal_thread.mdwn
+++ b/glibc/signal/signal_thread.mdwn
@@ -13,12 +13,11 @@ invoker of `kill` to the target process. The target process' [[signal_thread]]
job is it to listen to such messages and to set up signal handler contexts in
other threads.
----
-
-[[!tag open_issue_documentation]]
# IRC, freenode, #hurd, 2011-04-20
+[[!tag open_issue_documentation]]
+
<braunr> bugs around signals are very tricky
<braunr> signals are actually the most hairy part of the hurd
<braunr> and the reason they're aynchronous is that they're handled by a
@@ -50,3 +49,43 @@ other threads.
<braunr> mach and the hurd were intended to be "hyperthreaded"
[[open_issues/multithreading]].
+
+
+# IRC, freenode, #hurd, 2013-09-17
+
+ <teythoon> I just realized that I know next to nothing about signal
+ handling on the Hurd...
+ <teythoon> especially /hurd/inits role in it
+ <teythoon> reading glibcs kill.c it does not involve /hurd/init at all, but
+ /hurd/init is full of proxying code for the msg protocol
+ <teythoon> ah, /hurd/init mitms the signal handling logic in the libc for
+ its own signals
+ <teythoon> for msg_sig_post it sends a reply immediately, and then
+ processes the signal, I wonder why that is done
+ <teythoon> also it "forwards" any signals it receives to the child it
+ spawned (like /etc/hurd/runsystem), I wonder why...
+ <teythoon> good thing the comments tell what is done, not why...
+ <teythoon> so in theory kill -HUP 1 should have been forwarded to the
+ "runsystem" process, I wonder why that does not work if that one execs
+ sysvinit
+ <braunr> teythoon: can't help you there :/
+ <teythoon> braunr: I think I sorted it out on my own, we'll see how that
+ works out in practice ;)
+ <braunr> good
+
+
+## IRC, freenode, #hurd, 2013-09-18
+
+ <teythoon> braunr: I figured out why /hurd/init does this strange thing
+ with the msg protocol
+ <teythoon> braunr: it has no signal thread
+ <teythoon> I wonder how /hurd/exec and the initial filesystem handle
+ this...
+ <teythoon> err, afaics the signal thread is created in fork(), so any
+ process not created using it (ie manually using task_create) should lack
+ the signal thread, no?
+ <teythoon> that'd be the root fs, /hurd/{exec,init,auth,proc} and
+ /etc/hurd/runsystem (the child started by /hurd/init)
+ <teythoon> but I see only /hurd/init doing something about it, namely
+ setting a msgport and handling the msg protocol, relaying any messages to
+ the signal handling logic in the glibc
diff --git a/hurd.mdwn b/hurd.mdwn
index ed88f09f..77401278 100644
--- a/hurd.mdwn
+++ b/hurd.mdwn
@@ -77,6 +77,7 @@ in the *unstable* branch of the Debian archive.
# Developer References
+* [[Coding_Style]]
* [[Rules]]
* [[Trackers]]
* [[Building]]
diff --git a/hurd/coding_style.mdwn b/hurd/coding_style.mdwn
new file mode 100644
index 00000000..cc1e3cf3
--- /dev/null
+++ b/hurd/coding_style.mdwn
@@ -0,0 +1,59 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+[[!tag open_issue_documentation]]
+
+Some coding style comments that are specific to Hurd systems.
+
+[[!toc]]
+
+
+# Freeing Port Rights
+
+## IRC, freenode, #hurd, 2013-07-01
+
+ <teythoon> do I have to explicitly free ports in a short lived process like
+ mount?
+ <pinotree> better take the habit of doing that anyway
+ <teythoon> how do I recognize that I have to free something? mig spec?
+ <braunr> i'd say no
+ <braunr> mig does it for you
+ <braunr> gnumach reference manual
+ <teythoon> not memory, like port rights
+ <braunr> but no, really, for short lived processes it's ok
+ <braunr> yes, port rights
+ <braunr> like memory, you don't free stuff in short lived processes :p
+ <braunr> mach does it correctly when the task is destroyed
+ <braunr> but there are two use cases for rights
+ <braunr> those you create manually
+ <braunr> and those mig creates for its own purpose
+ <braunr> ignore those used by mig, they matter only in very specific parts
+ of glibc and other very low level stuff
+ <braunr> teythoon: keep in mind that there are two flavours of resources
+ with port rights
+ <teythoon> but how do I *know* from looking at say fs.defs that I have to
+ free anything I get?
+ <braunr> rights themselves, and the user reference count per right
+ <braunr> eh, that's complicated
+ <braunr> in a complete RPC call, you must watch two things usually
+ <braunr> out of line memory
+ <braunr> and right references
+ <braunr> except otherwise mentioned, you don't have to free anything
+ <braunr> freeing passed memory should be obvious (e.g. "out" keyword on a
+ memory range)
+ <braunr> for right references, it's less obvious
+ <braunr> refer to the mach server writing guide i guess
+ <teythoon> what does the dealloc qualifier do in mig defs?
+ <braunr> basically, send rights can be created from a receive right
+ (make_send), or another send right (copy_send)
+ <braunr> it tells mig which function to call once an RPC has returned
+ <braunr> all this is described in the mach server writing guide
+ <braunr> and it's tricky
+ <braunr> quite error-prone so check with portinfo
diff --git a/hurd/console/discussion.mdwn b/hurd/console/discussion.mdwn
index f887d826..0022ec23 100644
--- a/hurd/console/discussion.mdwn
+++ b/hurd/console/discussion.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2012, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -10,6 +10,8 @@ License|/fdl]]."]]"""]]
[[!tag open_issue_documentation]]
+[[!toc]]
+
# IRC, OFTC, #debian-hurd, 2012-09-24
@@ -40,3 +42,9 @@ License|/fdl]]."]]"""]]
hacking?
<allesa> to fix that is…
<youpi> some hacking yes
+
+
+# IRC, freenode, #hurd, 2013-07-10
+
+ <pinotree> http://xkbcommon.org/ ‘¡û sounds interesting for our console
+ translator
diff --git a/hurd/debugging.mdwn b/hurd/debugging.mdwn
index f4b5eba5..ae9b7bef 100644
--- a/hurd/debugging.mdwn
+++ b/hurd/debugging.mdwn
@@ -26,3 +26,30 @@ License|/fdl]]."]]"""]]
* [[glibc]]
* [[translator]]s
* [[trap_in_the_kernel]]
+
+
+# IRC, freenode, #hurd, 2013-06-30
+
+ <hacklu> braunr: I don't understand your question totally, but I want to
+ know how do you do this inspecting? <braunr> i have a small test program
+ that creates a thread, and inspect its state before any thread dies
+ <braunr> i use portinfo
+ <braunr> and rpctrace
+ <braunr> (there is also vminfo but you're not likely to need it for what
+ you're doing right now)
+ <hacklu> I have used rpctrace before, but portinfo, I will try it.
+ <hacklu> is portinfo show a process's all port use log?
+ <braunr> not log
+ <braunr> current state
+ <hacklu> dump the port name space?
+ <braunr> yes
+ <hacklu> I found some names are not continuous. how this come out?
+ <braunr> continuous ?
+ <hacklu> 101:send 103:send
+ <hacklu> missing 102
+ <braunr> some are freed
+ <braunr> a lot actually
+ <braunr> every RPC needs a reply port
+ <braunr> a temporary receive right to get replies from servers
+ <hacklu> so we can reuse the name which are freed before
+ <braunr> of course
diff --git a/hurd/debugging/rpctrace.mdwn b/hurd/debugging/rpctrace.mdwn
index a5c1a6e9..d62a4387 100644
--- a/hurd/debugging/rpctrace.mdwn
+++ b/hurd/debugging/rpctrace.mdwn
@@ -16,6 +16,17 @@ doing.
See `rpctrace --help` about how to use it.
+# IRC, freenode, #hurd, 2013-07-29
+
+ <teythoon> about rpctrace, it poses as the kernel for its children, parses
+ and relays any messages sent over the childrens message port, right?
+ <braunr> teythoon: rpctrace doesn't "poses as the kernel"
+ <braunr> well, it's close enough
+ <teythoon> but it intercepts messages send by its children by handing them
+ a message port different from the one provided by the kernel, doesn't it?
+ <braunr> yes
+
+
# Issues and Patches
[[!tag open_issue_hurd]]
@@ -182,6 +193,38 @@ See `rpctrace --help` about how to use it.
<youpi> uhu, there's a TODO just above that assertion :)
+* IRC, freenode, #hurd, 2013-07-05
+
+ <pinotree> wish: make rpctrace decode the results of io_stat rpcs
+
+* IRC, freenode, #hurd, 2013-07-29
+
+ <teythoon> imho rpctrace is kind of a mess right now :-/ we should move the
+ parsing code to a library
+ <teythoon> that would also be useful for valgrind, it should have to do
+ basically the same
+
+* IRC, freenode, #hurd, 2013-07-29
+
+ <teythoon> and I tried to rpctrace a subhurd, but rpctrace died on a
+ assertion failure, some msg had an unexpected type or something
+ <braunr> rpctrace dies on select
+ <braunr> and guess what, the boot tool does call select on the console it
+ emulates
+ <teythoon> that's a shame, that'd be really useful for me
+ <braunr> it might not be hard to fix
+ <braunr> but i've never looked into it :/
+ <braunr> i only saw that rpctrace expects the common RPC message types
+ <braunr> and select is all but a common RPC
+ <braunr> so the type of the messages involved is slightly different
+ <braunr> and the assertion chokes on that
+ <teythoon> rpctrace.c is huge and hand written, it'd be nice if the parser
+ was created from the procedure definitions
+ <teythoon> and thinking of that, mig does exactly that, one would only need
+ some glue code
+ <braunr> select is partially hand written
+ <braunr> but it's a special case so that's ok
+
# See Also
diff --git a/hurd/libfuse.mdwn b/hurd/libfuse.mdwn
index 45ff97ec..78e96022 100644
--- a/hurd/libfuse.mdwn
+++ b/hurd/libfuse.mdwn
@@ -29,6 +29,26 @@ etc.
* File I/O is quite slow.
+## IRC, freenode, #hurd, 2013-05-31
+
+ <zacts> well the reason I'm asking, is I'm wonder about the eventual
+ possibility of zfs on hurd
+ <pinotree> no, zfs surely not
+ <zacts> *wondering
+ <zacts> pinotree: would that be because of license incompatabilities, or
+ technical reasons?
+ <pinotree> the latter
+ <taylanub> It's just a matter of someone sitting down and implementing it
+ though, not ?
+ <pinotree> possibly
+ <braunr> zacts: the main problem seems to be the interactions between the
+ fuse file system and virtual memory (including caching)
+ <braunr> something the hurd doesn't excel at
+ <braunr> it *may* be possible to find existing userspace implementations
+ that don't use the system cache (e.g. implement their own)
+ <braunr> and they could almost readily use our libfuse version
+
+
# Source
[[source_repositories/incubator]], libfuse/master.
diff --git a/hurd/libstore.mdwn b/hurd/libstore.mdwn
index 8eac39fe..b2e7f7a9 100644
--- a/hurd/libstore.mdwn
+++ b/hurd/libstore.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2007, 2008, 2009 Free Software Foundation,
+[[!meta copyright="Copyright © 2007, 2008, 2009, 2013 Free Software Foundation,
Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
@@ -6,8 +6,8 @@ id="license" text="Permission is granted to copy, distribute and/or modify this
document under the terms of the GNU Free Documentation License, Version 1.2 or
any later version published by the Free Software Foundation; with no Invariant
Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
-is included in the section entitled
-[[GNU Free Documentation License|/fdl]]."]]"""]]
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
`libstore` is used to provide a generic interface to access data (read/write)
on backing stores.
@@ -15,6 +15,8 @@ on backing stores.
It more than just a thin layer between [[GNU Mach|microkernel/mach/gnumach]]
devices (`hd0` for example) and the device node below `/dev/`...
+[[!toc]]
+
# Available Stores
@@ -34,3 +36,32 @@ can be found.
pages="hurd/libstore/examples/* and !*/discussion"
show=0
feeds=no]]
+
+
+# Open Issues
+
+## IRC, freenode, #hurd, 2013-07-29
+
+[[!tag open_issue_documentation open_issue_hurd]]
+
+ <teythoon> and I read hammys paper about mobile code, is it true that the
+ store code is loaded into the client? who is the server and who is the
+ client in this context?
+ <braunr> teythoon: "store code" ?
+ <teythoon> libstore
+ <braunr> the hurd libstore ?
+ <teythoon> yes
+ <braunr> hum, what paper ?
+ <teythoon> braunr:
+ http://users.student.lth.se/cs07fh9/2009-hammar-hurd-mobility.pdf
+ <braunr> how nice
+ <tschwinge> braunr: http://www.gnu.org/software/hurd/news/2010-01-31.html
+ <teythoon> it raises an important point btw, the authentication done by
+ processes on the Hurd is one sided, only the client authenticates at the
+ server
+ <braunr> yes
+ <tschwinge> It'S also mentioned in
+ http://www.gnu.org/software/hurd/hurd/documentation.html -- but of
+ course, any results he got from his work really should be integrated more
+ properly into the existing body of documents.
+ <tschwinge> As with so many other documents/discussions/etc. ;-|
diff --git a/hurd/libstore/part.mdwn b/hurd/libstore/part.mdwn
index 5260d05d..29ef9072 100644
--- a/hurd/libstore/part.mdwn
+++ b/hurd/libstore/part.mdwn
@@ -1,4 +1,5 @@
-[[!meta copyright="Copyright © 2010, 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2010, 2012, 2013 Free Software Foundation,
+Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -28,6 +29,132 @@ A similar problem is described in
[[community/gsoc/project_ideas/unionfs_boot]], and needs to be implemented.
-# TODO
+# Open Issues
-How to use, etc.
+## Documentation
+
+[[!tag open_issue_documentation]]
+
+## [[open_issues/hurd_build_without_parted]]
+
+## IRC, freenode, #hurd. 2013-09-21
+
+ <phcoder> Hello, guys. Is there a way to know where partition starts on
+ hurd. E.g. given hd0s1 get "2048 sectors"
+ <youpi> yes, it's the storeinfo RPC
+ <youpi> let me find you a pointer
+ <phcoder> in GRUB 2 files for determining device relations are a mess of
+ #if's. I try to split it into logical files and make common logic
+ uniform. Current Hurd's logic is completely different and, actually,
+ wrong. Same logic is used by Mac OS X part ...
+ <youpi> phcoder: Mmm, I guess you never got the userland-part.patch
+ upstream
+ <youpi> ah, yes ,you did
+ <youpi> I mean the find_hurd_root_device function
+ <youpi> grub was previously using file_get_storage_info
+ <phcoder> youpi: find_hurd_root_Device/file_get_storage info is about
+ translating / -> /dev/hd0s1. Current problem is in step hd0s1 ->
+ hd0,msdos1
+ <youpi> yes, but iirc file_get_storage_info might work for hd0s1 itself
+ <phcoder> I see, let me try this
+ <phcoder> youpi: file_get_storage gives offset=0 size=partition size
+ <youpi> (file_get_storage) damn
+ <phcoder> and name=hd0s1
+ <youpi> ah, that might be because we're still using in-kernel partition
+ table, instead of the parted partition table
+ <phcoder> looks like file_get_storage would be useful to get block size
+ though
+ <phcoder> youpi: is parted already used in some cases? Any reliable way to
+ check for it? Any way to access kernel partition map? Ioctl? RPC to
+ kernel?
+ <youpi> the parted table is only enabled in the debian installer for
+ now. You can set up one for yourself by running e.g. settrans -c
+ /tmp/myhd0s1 /hurd/storeio -T typed part:1:device:hd0
+ <youpi> I don't think there is any ioctl/RPC to get the kernel partition
+ table
+ <phcoder> youpi: is it using Linux partition code with some glue?
+ <youpi> phcoder: the kernel partition table, yes
+ <phcoder> youpi: that's bad. it's probably one of the least consistent
+ numbering schemes. It would imply that it only worked because only
+ simplest cases were ever tested
+ <youpi> I know
+ <youpi> that's why we want to migrate to the parted-based partition table
+ support
+ <youpi> (which also brings us much better support than the old linux2.0
+ code :) )
+ <phcoder> youpi: I've looked into code and must say that I dislike what I
+ see: partitions handled in ide/ahci/sd/...
+ <youpi> phcoder: which code?
+ <phcoder> youpi: gnumach
+ <youpi> sure, that's not what we want in the end
+ <phcoder> grep -r start_sect
+ <youpi> it's just the legacy linux way of doing partition support
+ <phcoder> Well Linux at least gives a meaningful ioctl
+ <phcoder> couldn't find any hint of it in gnumach
+ <youpi> we didn't bother to add one since the parted way is supposed to be
+ what we have in the end
+ <phcoder> youpi: I can't make our code follow sth that might be the case in
+ the future
+ <youpi> why not?
+ <youpi> that's the way we will go
+ <youpi> it's not just hypothetic
+ <youpi> we just can't continue maintaining disk drivers in the kernel
+ <youpi> so it won't be in the kernel
+ <phcoder> youpi: if I do then GRUB won't work on current GNU/Hurd anymore
+ <youpi> can't you also keep the old code?
+ <youpi> as a fallback when the proper way does not work (yet)
+ <phcoder> More hairs... :(
+ <phcoder> How do I check for it? offset == 0 isn't proper as partitions may
+ start at 0
+ <phcoder> but checking than name still refers to partition is probably the
+ right way
+ <youpi> I don't see what you mean
+ <youpi> (about name)
+ <phcoder> youpi: I mean that we need a way to know that current code is
+ used and not future parted-based code
+ <youpi> phcoder: I understand that for the offset ==0 thing
+ <youpi> but I didn't understand the phrase you wrote just after that
+ <phcoder> youpi: file_get_storage gives back a name. If this name is the
+ same as the partition we requested in the first place then it's current
+ code
+ <youpi> ah, ok
+ <youpi> yes, if the name is the same, it means it's not actually a
+ partition
+ <phcoder> youpi: current gnumach code makes fake devices out of partitions
+ <youpi> yes
+ <phcoder> youpi: with settrans command you told, I get num_ints = 0
+ <youpi> phcoder: odd, I do get information, e.g.:
+ <youpi> hurd:/tmp# settrans -c /tmp/mysd0s1 /hurd/storeio -T typed
+ part:1:device:sd0
+ <youpi> hurd:/tmp# storeinfo mysd0s1
+ <youpi> device (0x200): sd0: 512: 83905: 42959360: 63+83905
+ <phcoder> storeinfo: myhd0s1: Operation not supported
+ <youpi> do you actually have an hd0 device?
+ <phcoder> yes
+ <phcoder> youpi: I typed parted instead of part
+ <phcoder> Now it works
+ <youpi> good :)
+ <phcoder> youpi: what is expected timeline on migration to part interface?
+ <youpi> there's no real timeline
+ <youpi> like everything, it'll happen when somebody actually looks at how
+ to achieve it
+ <youpi> perhaps it'll be easy, perhaps not. IIRC there is still an issue
+ with the swapper
+ <phcoder> youpi: sounds like we're stuck will fallback code for at least
+ couple of years
+ <youpi> possibly, entirely depends on people taking the task
+ <youpi> if that becomes really pressing at some point, I'll have to do it,
+ but of course, I can not magically do everything in a glimpse
+ <phcoder> youpi: it's not pressing but just be aware that unusual
+ partitioning is likely to fail. Probably not huge issue. As to its place
+ in our code it's not ideal but it's not the only case of suboptimal
+ construction for specific systems (what we had to do because of Linux
+ caching is terrifying). I'm not going to make hurd code a scapegot of
+ more generic problem
+ <phcoder> youpi: and since we very rarely drop support this code is
+ probably stuck for good
+ <youpi> as long as it's not used whenever we get to move to parted-based
+ partitioning, it's not too bad
+ <phcoder> youpi: and Mac OS X/Darwin case is even worse. Apparently they
+ deprecated their *BSD functions (which probably don't work since they
+ don't use BSD labels) without giving any replacement.
diff --git a/hurd/running/debian/dhcp.mdwn b/hurd/running/debian/dhcp.mdwn
index afa46799..849ff382 100644
--- a/hurd/running/debian/dhcp.mdwn
+++ b/hurd/running/debian/dhcp.mdwn
@@ -1,4 +1,5 @@
-[[!meta copyright="Copyright © 2011, 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2011, 2012, 2013 Free Software Foundation,
+Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -29,3 +30,97 @@ scripts, but has its own `/libexec/rc` script -- which integrates scripts from
* [[!debbug 616290]]
* [[Proper Hurdy DHCP support|hurd/translator/pfinet/dhcp]]
+
+ * [[!message-id desc="dhclient aborting with a stack smashing error"
+ "874ngfvwn4.fsf@kepler.schwinge.homeip.net"]]
+
+ IRC, freenode, #hurd, 2013-08-21:
+
+ <teythoon> yay, I fixed the path of the dhcp leases file...
+ <teythoon> ... and now dhclient dies of a buffer overflow
+ <teythoon> fortunately the fix is rather simple, anyone who cares about
+ the security of his box just has to stop using isc software
+ <teythoon> the code is full of stuff like char foo[100]; /* surely
+ that's enough */
+ <pinotree> note that our version of isc-dchp (the one in ports) is
+ older than the latest one available in unstable (which is still older
+ than the latest upstream releases)
+ <teythoon> so?
+ <pinotree> dunno, might have been fixed or not
+ <teythoon> ^^ yeah sure
+ <gnu_srs> A lot of software has these limitations and PATH_MAX,
+ MAXPATHLEN issues :(
+ <pinotree> having a limitation is not a problem per-se
+ <teythoon> no, only software written in c has these kind of problems
+ <pinotree> the problem is not checking whether the limits are hit
+ <teythoon> well, looking at the source of isc-dhcp my time is better
+ spent making another dhcp client work on hurd
+ <teythoon> also reading up on bug #616290 does make me want to avoid
+ touching it ever
+ <braunr> hehe
+ <gnu_srs> teythoon: somebody was offering an alternative to the isc
+ dhcpclient, but I think it was rejected by Samuel?
+ <teythoon> why would he do that?
+ <braunr> probably for compliance
+ <gnu_srs> He probably thought they would release a new version soon, is
+ 4.3.0 out yet?
+ <teythoon> well, as soon as my fixes for ifupdown go in, dhclient will
+ start crashing
+ <teythoon> no, there is no new version released
+ <teythoon> no major one that is
+ <teythoon> 4.2.5 is out
+ <gnu_srs> can't you just increase the buffer size, where is the problem
+ exactly?
+ <teythoon> I have no idea
+ <gnu_srs> The Hurd patches are not in 4.2.5, they were promised for
+ 4.3.0a1.
+ <gnu_srs> Still the buffer overflow problem might be present in 4.2.5
+ if patched to build on Hurd.
+ <braunr> there, darnassus now has a fully featured git/gitweb service
+ <teythoon> :)
+ <teythoon> btw, I managed to reproduce the crash reliably
+ <teythoon> rm /var/lib/dhcp/*; dhclient -v /dev/eth0 ... *boom*
+ <teythoon> ditch the -v, everything works, and now that there is a
+ lease file, you can add the -v again and it works
+ <braunr> ew :)
+ <teythoon> and what has dhclient.c to say for its defense?
+ <teythoon> log_info("%s", "");
+ <teythoon> hm, not much :/
+
+ IRC, freenode, #hurd, 2013-08-22:
+
+ <teythoon> uh, the isc-dhcp situation is a huge pita, the source on
+ -ports does not compile anymore :/
+
+ IRC, freenode, #hurd, 2013-08-23:
+
+ <gnu_srs> teythoon: Was it the slash in the network interface names
+ that caused the buffer overflow in dhclient?
+ <teythoon> gnu_srs: no, previously no dhcp leases file was written and
+ everything was fine
+ <pinotree> teythoon: did you really develop your patch against that old
+ version of ifupdown?
+ <teythoon> gnu_srs: now it is written, and for some reason dhclient
+ crashes *iff* -v is given *and* there is no previous lease file
+ <teythoon> pinotree: no, I did not. that was only reportbug including
+ information from my desktop machine without asking me
+ <teythoon> but when I first looked at ifupdown it was still a 6000
+ lines noweb file >,<
+ <teythoon> that was fun
+ <pinotree> which version is it against?
+ <teythoon> hg tip
+
+ IRC, freenode, #hurd, 2013-08-30:
+
+ <tschwinge> teythoon: I understand correctly that you found that
+ id:"874ngfvwn4.fsf@kepler.schwinge.homeip.net" in fact was really
+ "just" a buffer overflow in the dhclient code?
+ <teythoon> tschwinge: ah, most interesting, I didn't realize that you
+ stumbled across this as well
+ <teythoon> to be honest I don't know what's going on there, I only
+ observed what I wrote in my report
+ <teythoon> for me it started crashing once the lease file was actually
+ a valid path (i.e. not to a non-existing directory b/c of the slashes
+ in /dev/eth0)
+ <teythoon> I tried to rebuild the package served on debian-ports, but
+ that failed
diff --git a/hurd/subhurd.mdwn b/hurd/subhurd.mdwn
index df708499..55927fdd 100644
--- a/hurd/subhurd.mdwn
+++ b/hurd/subhurd.mdwn
@@ -42,6 +42,22 @@ set up another Hurd on a different partition, without ever rebooting. (You can
run the `native-install` step from a chroot or already in a subhurd.)
+### IRC, freenode, #hurd, 2013-09-15
+
+ <gnu_srs> Never dared to try a subhurd, any link to the howto?
+ <teythoon> gnu_srs: I followed
+ http://www.gnu.org/software/hurd/hurd/subhurd.html though using crosshurd
+ didn't work for me, I just used debootstrap
+ <teythoon> gnu_srs: and you need a separate filesystem translator (i.e. not
+ /) for that
+ <teythoon> the easiest way is to add another virtual disk to you qemu setup
+ <braunr> use the qemu image directly
+ <braunr> simplest way to set up a subhurd
+ <braunr> just change fstab from the host before the first boot to avoid
+ making the subhurd use the same hd0 drive as the host
+ <teythoon> braunr: nice idea :)
+
+
## Booting
To boot the subhurd, you need a boot script. For historical reasons, usually
@@ -86,7 +102,8 @@ it!
practice [that doesn't work at the
moment](http://savannah.gnu.org/bugs/?17341).)
-You can provide the subhurd with a network card by passing a -f option to `boot`.
+You can provide the subhurd with a network card by passing a `-f` option to
+`boot`.
Now the subhurd should boot just like a normal Hurd started directly from GRUB,
finally presenting a login prompt. The `boot` program serves as proxy for the
@@ -120,6 +137,362 @@ look at the number of threads (e.g. using `ps -l`), as many servers have very
characteristic thread counts.
+### IRC, freenode, #hurd, 2013-08-09
+
+ <teythoon> btw, is there a way to get dde-based networking into a subhurd?
+ <teythoon> the wiki instructions look like they're for the mach driver
+ <teythoon> and starting the dde translator inside the subhurd does not work
+ for me
+ <teythoon> that's probably a good thing though
+ <youpi> the netdde process will need privileged access to mach
+ <youpi> for hardware access
+ <braunr> you can't easily use netdde from a subhurd, unless with a
+ different nic
+ <braunr> i usually rebuild mach with in kernel devices so both the main and
+ subhurd can share on nic
+ <braunr> one*
+ <youpi> could a port to netdde perhaps forwarded to the subhurd?
+ <braunr> zengh da wrote the eth-multiplexer for that iirc
+ <youpi> it's a matter of making it appear as an eth0 device on the master
+ port aiui
+ <braunr> zheng*
+ <teythoon> yes, I looked at that
+ <teythoon> what is the master port?
+ <youpi> on a plain hurd system it's the port that privileged processes can
+ use to access mach devices
+ <youpi> in a subhurd, it's the same for the subhurd, to access some devices
+ that you choose to give access to
+ <braunr> its real name is the "device master port"
+ <teythoon> ah yes
+
+
+#### IRC, freenode, #hurd, 2013-08-10
+
+ <antrik> teythoon: use eth-multiplexer to use the NIC within a
+ subhurd. that's exactly what it was created for.
+ <antrik> I don't remember whether it's even possible to share a "raw"
+ netdde device... I don't think I ever tried that; and I don't remember
+ enough of the theory to tell whether it should be possible
+ <antrik> but I really don't see the reason to, when eth-multiplexer is
+ available
+ <antrik> (IMHO running an eth-multiplexer on top of netdde should be the
+ default setup in fact)
+ <antrik> as for actually passing on the device, that should be perfectly
+ possible with zhengda's modified subhurd... but I don't remember whether
+ that was ever merged upstream
+ <antrik> (you will definitely need that for using netdde in a subhurd,
+ regardless whether through eth-multiplexer or directly)
+
+
+#### IRC, freenode, #hurd, 2013-09-15
+
+ <teythoon> I wonder if we can modify the boot program so that it passes
+ ports from the mother hurd to the subhurd
+ <teythoon> so that we could pass in a port to the eth-multiplexer
+ <teythoon> or use like /hurd/remap as the root translator for the subhurd
+ <braunr> eth-multiplexer was created exactly for that iirc,
+ <braunr> so it's probably already done somewhere
+
+
+#### IRC, freenode, #hurd, 2013-09-16
+
+ <gnu_srs> braunr: regarding subhurd did you mean to install
+ sthibault/hurd-i386/debian-hurd.img.tar.gz
+ <gnu_srs> on a separate partition and booting using the instructions for
+ subhurds on the web.
+ <braunr> gnu_srs: yes
+ <braunr> be careful that the subhurd doesn't use the same partition as the
+ main hurd, that's all
+ <gnu_srs> what about changing fstab?
+ <braunr> 12:17 < braunr> be careful that the subhurd doesn't use the same
+ partition as the main hurd, that's all
+ <teythoon> gnu_srs: yes, you need to change the fstab
+ <teythoon> currently it is used for fscking stuff, so if it points to your
+ main partition it will cause severe corruption
+ <teythoon> gnu_srs: you also have to specify the right partition in the
+ servers.boot file
+ <gnu_srs> fstab of the subhurd image?
+ <teythoon> yes
+ <gnu_srs> how to unpack the .img file (just to be sure)?
+ <teythoon> gnu_srs: you don't need to, just use the img file as secondary
+ hard disk image
+ <gnu_srs> Then how should I be able to change fstab of the image?
+ <teythoon> boot your hurd box, mount the partition and change it
+ <gnu_srs> I missed something here: on my partition /my_chroot I have have
+ the file debian-hurd-20130504.img
+ <teythoon> gnu_srs: ah, you copied it to the partition, braunr meant to use
+ it as the secondary disk, e.g. qemu ... -hdb debian-hurd-20130504.img ...
+ <gnu_srs> That is the same as installing another cd image, where does the
+ subhurd come into play?
+ <teythoon> mount the partition on the secondary hd, fix the fstab there,
+ mount it r/o, get the servers.boot file from the wiki, modify it so that
+ it points to the right partition, execute boot servers.boot /dev/<your
+ partition>, probably /dev/hd1s1
+ <gnu_srs> BTW: unpacking was problematic: tar: debian-hurd-20130504.img:
+ Cannot seek to 2147696640 (2G limitations)
+ <teythoon> I wonder why you did this on your hurd system in the first
+ place...
+ <gnu_srs> I thought I could use that partition, /my_chroot as a chroot
+ place. So it won't work for subhurds?
+ <teythoon> well, there are several ways to setup a subhurd. one is to
+ already have a spare partition for that and use crosshurd or as I did
+ debootstrap to install a debian system there
+ <teythoon> braunr suggested an even easier way, download the .img file and
+ use it as secondary hard disk
+ <teythoon> you ended up doing kind of both
+ <gnu_srs> I tried once with debootstrap and that created a disaster...
+ <teythoon> how so?
+ <gnu_srs> The install errored out, and the whole filesystem (including /)
+ was left in a broken state. Maybe I tried
+ <gnu_srs> that without using a separate partition. Don't remember any
+ longer. So you say it's safe now?
+ <teythoon> I used it successfully to setup my subhurd
+ <gnu_srs> and you have your subhurd in a separate partition, installed from
+ there too, as root?
+ <gnu_srs> the web page only mentions crosshurd, and that failed for you?
+ <teythoon> yes, having a separate partition is (currently) necessary to run
+ a subhurd
+ <teythoon> yes, I used debootstrap as root, afaics that is necessary
+ <teythoon> and yes, as I said the other day, I tried crosshurd first and it
+ failed
+ <teythoon> then again, I fail to see any reason to use crosshurd these days
+ <teythoon> it's only a wrapper around debootstrap anyway, using it with
+ --foreign and fixing up stuff later
+ <teythoon> one has more control over the process if one uses debootstrap
+ directly
+ <gnu_srs> I still don't dare to do it yet. I'll create another image using
+ netinst with a separate partition and try out first.
+ <gnu_srs> When installing a new image using netinst.iso (2013-06-30) and
+ rebooting /proc does not get mounted?
+ <teythoon> gnu_srs: is that a statement or a question?
+ <gnu_srs> A statement.
+ <teythoon> it's not customary to end statements with question marks ;)
+ <gnu_srs> s/mounted?/mounted, why?/
+ <teythoon> well, you seem to be the last person to perform such an
+ installation, so you are in the perfect position to answer this question.
+ <gnu_srs> cat /var/log/dmesg?
+ <gnu_srs> On other images I have: fsysopts /proc; /hurd/procfs
+ --clk-tck=100 --stat-mode=444 --fake-self=1
+ <youpi> gnu_srs: no, check the installation log
+ <youpi> gnu_srs: and what does showtrans say?
+ <gnu_srs> showtrans /proc; <empty>
+ <gnu_srs> which log file to look for?
+ <youpi> the installation log, somewhere in /var/log probably
+ <gnu_srs> I only find /proc in /var/log/installer/syslog, mainly printing
+ out errors not finding /proc/mounts
+ <youpi> iirc the /proc translator should be set during the hurd package
+ configuration
+ <youpi> you should probably look for that part in the log
+ <youpi> Setting up translators: /hurd/exec /hurd/proxy-defpager
+ /hurd/pflocal (+link) /hurd/pfinet (+link) (+link) /hurd/procfs -c
+ /hurd/password crash-kill crash-suspend crash-dump-core crash.
+ <youpi> that part
+ <gnu_srs> debootstrap: /hurd/procfs -c and in-target: /hurd/procfs -c No
+ errors
+ <youpi> I don't understand what that means
+ <youpi> please explain in more details
+ <gnu_srs> see: http://paste.debian.net/41195/
+ <youpi> makes much more sense :)
+ <gnu_srs> Where is the 'Setting up translators' done? I cannot find
+ anything in /var/lib/dpkg/info/hurd* or /etc/init.d/...
+ <pinotree> /usr/lib/hurd/setup-translators, called in hurd.postinst
+ <gnu_srs> tks:)
+ <gnu_srs> Hi, when installing a new image with debootstrap to /chroot the
+ script boot/servers.boot is already there (as well as in /boot/ + grub)
+ <gnu_srs> Is it OK to use that file to boot the subhurd?
+ <gnu_srs> using /boot/servers.boot or /chroot/boot/servers.boot (if the
+ /chroot partition is unmounted it cannot be used?)
+ <gnu_srs> and how to unmount /chroot: umount does not work?
+ <gnu_srs> braunr: I'm also trying to find out what's wrong with glibc, when
+ my subhurd is up and running 2.13-39 (if possible)
+ <gnu_srs> I know I should issue settrans command, but I'm not yet fluent in
+ translators.
+ <gnu_srs> sorry:-/
+ <gnu_srs> Now this, after a reboot: unknown code P 30 while trying to open
+ /dev/hd0s3 (/chroot)
+ <gnu_srs> Disk write protected: use the -n option to do a read-only check
+ of the device.
+ <gnu_srs> fsysopts /dev/&hd0s1 --writable: Operation not supported??
+ <gnu_srs> OK, I'm giving up for now, no subhurd:-( and a broken install.
+ <gnu_srs> Which terminal to use in rescue mode, TERM is not set,
+ dumb,mach,hurd does not work with nano?
+ <gnu_srs> e2fsck /dev/ho0s3; e2fsck: Unknown code P 2 while trying to open
+ /dev/ho0s3; Possibly non-existent device?
+ <gnu_srs> mke2fs /dev/hd0s3; /dev/hd0s3 is not a block special device.;
+ Proceed anyway? (y,n) n: What's going on (hd0s3 not mounted)??
+ <gnu_srs> anybody, help?
+ <gnu_srs> after removing and creating the partition again:mke2fs
+ /dev/hd0s3, <same>, mke2fs: Unknown code P 13 while trying to determine
+ filesystem size: What's going on?
+ <gnu_srs> Where to find the glibc-2.13 versions which used to be at
+ debian-ports?.
+ <gnu_srs> seems they can be found on snapshot.debian.org
+
+
+#### IRC, freenode, #hurd, 2013-09-17
+
+ <gnu_srs> teythoon: Installing subhurd via debootstrap on partition
+ /chroot fails miserably. Install hangs, and after reboot \rm -r
+ /chroot/* fails for dev and proc
+ <gnu_srs> Are there translators running there already? I have not
+ booted the subhurd.
+ <gnu_srs> translators for hd0s3 (/chroot) are storeio and
+ ex2fs.static. Do I have to stop them to be able to clean out
+ /chroot?
+ <gnu_srs> mount -v /chroot; settrans -a /chroot /hurd/ext2fs
+ /dev/hd0s3;
+ <gnu_srs> ext2fs: /dev/hd0s3: panic: main: device too small for
+ superblock (0 bytes);
+ <gnu_srs> mount: cannot start translator /hurd/ext2fs: Translator
+ died
+ <gnu_srs> Please, somebody!
+ <gnu_srs> don't ask to ask, just ask, right?
+ <braunr> we've already told you everything you need
+ <braunr> just get it right
+ <braunr> for example, i told you to be careful about fstab so that
+ the subhurd wouldn't use the main hurd partition
+ <braunr> but you managed to screw that
+ <braunr> good job
+ <gnu_srs> I installed the subhurd in a partition /chroot /dev/hd0s3
+ using debootstrap
+ <braunr> i don't know deboostrap, it may be broken, use the disk
+ image youpi maintains
+ <gnu_srs> ant the install screwed up with debootstrap
+ <gnu_srs> ok; then I cannot use a partition, but another disk in
+ kvm, e.g. hdb?
+ <braunr> gnu_srs: hd1
+ <gnu_srs> something is fishy with glibc, definitely, that's why I'm
+ trying to set up a subhurd to revert to 2.13-39
+ <gnu_srs> hi, when trying to boot a subhurd: /hurd/ext2fs.static:
+ hd0s3: Gratuitous error; bye
+ <braunr> gnu_srs: why hd0s3 ?
+ <braunr> it should be hd1s1
+ <gnu_srs> I'm still using a separate partition /my_chroot
+ /hd0s3. Will switch to hd1 next. teythoon?
+ <gnu_srs> the servers.boot script use absolute
+ paths:/hurd/ext2fs.static and /lib/ld.so.1 /hurd/exec,
+ <gnu_srs> shouldn't they be relative to /my_chroot?
+ <braunr> no
+ <braunr> they're actually from your host
+ <gnu_srs> teythoon: please, how did you succeed to boot a subhurd
+ in a partition?
+ <gnu_srs> using debootstrap
+ <teythoon> gnu_srs: from my shell history:
+ <teythoon> : 1374672426:0;debootstrap sid /mnt
+ http://http.debian.net/debian/
+ <teythoon> : 1374673020:0;cp /etc/hosts /etc/resolv.conf /mnt/etc
+ <teythoon> : 1374673048:0;cp /etc/passwd /etc/shadow /mnt/etc
+ <braunr> teythoon: so it does work fine ?
+ <braunr> great
+ <teythoon> yes, why wouldn't it?
+ <teythoon> gnu_srs: I then remounted that partition r/o and used
+ the servers.boot file from the wiki to boot it
+ <teythoon> braunr: why wouldn't it? (you do mean the debootstrap
+ part, don't you?)
+ <braunr> teythoon: i don't know
+ <braunr> i've heard it wasn't maintained any more
+ <braunr> not being maintained is a good reason for something to
+ become unusable/untrustable with time
+ <teythoon> o_O it is at the heart of d-i, isn't it?
+ <teythoon> I actually do most Debian installations using
+ debootstrap directly
+ <braunr> ah
+ <braunr> ok :)
+ <braunr> teythoon: even hurd ones ?
+ <teythoon> braunr: well, just the subhurd installation, but that
+ went as expected
+ <braunr> good
+ <gnu_srs> Finally: I found the reason for Gratuitous error, I used
+ the /boot/servers.boot script,
+ <gnu_srs> that being different to the one on the wiki:-/
+ <gnu_srs> Is it possible to copy files between a host hurd and
+ subhurd, what about access to eth0?
+ <gnu_srs> Hi, when starting the subhurd I see some warnings/error:
+ http://paste.debian.net/41963/
+ <gnu_srs> 1) A spelling error execunable-> executable
+ <gnu_srs> 2) libports: invalid destination port
+ <gnu_srs> 3) mach-defpager: another already running
+ <pinotree> "execunable" is not a typo, but just "exec" and "unable
+ ..." without a space-type character
+ <gnu_srs> OK, sounds more plausible
+ <gnu_srs> Ah, the printouts are mixed, no bug
+ <gnu_srs> When setting up nework in the subhurd: /hurd/pfinet:
+ file_name_lookup /dev/eth0: Translator died
+ <gnu_srs> /hurd/pfinet: device_open(/dev/eth0): (os/device) no such
+ device
+ <gnu_srs> settrans: /hurd/pfinet: Translator died
+
+
+#### IRC, freenode, #hurd, 2013-09-18
+
+ <youpi> priority does not matter much
+ <youpi> memory manager is not really surprising, there's indeed already one
+ <youpi> what is actually the problem?
+ <gnu_srs> So these are merely warnings?
+ <youpi> gnu_srs: yes
+ <gnu_srs> Real problems are I cannot set up networking, e.g. wget ...:
+ Connecting to ... failed: Address family not supported by protocol.
+ <youpi> gnu_srs: did you give the subhurd a network card?
+ <gnu_srs> How?
+ <gnu_srs> and do I need to set up fstab, for now it's empty.
+ <gnu_srs> I just installed the base with dbootstrap
+ <youpi> gnu_srs: -f option of boot
+ <youpi> e2fsck will need fstab for sure
+ <youpi> otherwise it can't divine what should be checked
+ <gnu_srs> Why is the /boot/servers.boot different from the subhurd one on
+ the wiki? Is it used at all, I thought grub was in charge.
+ <youpi> it's not used at all
+ <gnu_srs> maybe better to put in the subhurd one there then, with a
+ comment?
+ <youpi> no, since /boot/servers.boot is supposed to be used for machine
+ boot
+ <youpi> not subhurd boot
+ <gnu_srs> what about putting a copy of the suhurd one there, with a
+ different name?
+ <youpi> probably a good idea, yes
+ <youpi> matter of making it happen
+ <gnu_srs> the wiki page on subhurd does not say how to set up networking,
+ only that you can do it.
+ <youpi> matter of adding the information
+ <youpi> I remember it's the -f option of boot
+ <youpi> make it work, and add the information for others
+ <gnu_srs> I could try, but don't know how to add a network card to the
+ subhurd, and e.g. how to set up swap
+ <youpi> see -f option
+ <gnu_srs> of boot?
+ <youpi> "gnu_srs: -f option of boot"
+ <youpi> if you could read what we write, that'd make things happen way
+ faster
+ <gnu_srs> yes I saw your comment above, it was just to be 100% sure:-D
+ <gnu_srs> device_file=/dev/eth0 or something else?
+ <gnu_srs> eth0 is used by the host already
+ <youpi> did you read boot --help ?
+ <youpi> iirc it's not a problem, both will receive all frames
+ <gnu_srs> yes I did
+ <youpi> then I don't see where you took device_file from
+ <youpi> at least in that form
+ <youpi> --device=device_name=device_file
+ <youpi> that means rather something like --device=foo=bar
+ <gnu_srs> so -f /dev/eth0 is correct usage then?
+ <youpi> didn't you see that in what I wrote, there was a "=" in there?
+ <gnu_srs> -f is the short option, --device is the long, I don't see the
+ need for = in the short option?
+ <youpi> in the long option there are *two* =
+ <gnu_srs> yes, but in the short no?
+ <youpi> why not?
+ <youpi> long -> short usually drops one =
+ <gnu_srs> to summarize: -f=/dev/eth0 or --device=eth_sub=/dev/eth0?
+ <youpi> why shouldn't there be a eth_sub in the short version?
+ <gnu_srs> 10:15:49) youpi: long -> short usually drops one =
+ <youpi> yes, it drops the =
+ <youpi> but nothing else
+ <youpi> if the long option needs some information, the short needs it too?
+ <youpi> -?
+ <gnu_srs> correct now? -f eth_sub=/dev/eth0 or --device=eth_sub=/dev/eth0?
+ <youpi> yes
+ <gnu_srs> k!
+
+
# Further Info
Read about using a subhurd for [[debugging_purposes|debugging/subhurd]].
diff --git a/hurd/subhurd/discussion.mdwn b/hurd/subhurd/discussion.mdwn
index 6e694677..fac93625 100644
--- a/hurd/subhurd/discussion.mdwn
+++ b/hurd/subhurd/discussion.mdwn
@@ -170,3 +170,13 @@ License|/fdl]]."]]"""]]
<zacts> ah ok
<braunr> in theory, subhurds can run without root privileges
<braunr> (but there are currently a few things that prevent it)
+
+
+## IRC, freenode, #hurd, 2011-06-07
+
+ <zacts> would hurd jails be more powerful than FreeBSD jails? how so?
+ <braunr> not more powerful
+ <braunr> easier to develop
+ <braunr> safer
+ <braunr> perhaps more powerful too, but that entirely depends on the
+ features you want inside
diff --git a/hurd/translator.mdwn b/hurd/translator.mdwn
index d4eaf950..da141dc2 100644
--- a/hurd/translator.mdwn
+++ b/hurd/translator.mdwn
@@ -90,17 +90,21 @@ The [[concept|concepts]] of translators creates its own problems, too:
* [[hello]]
* [[auth]]
* [[exec]]
+* [[proc]]
* [[pfinet]]
+* [[eth-filter]]
* [[pflocal]]
* [[hostmux]]
* [[storeio]]
* [[ext2fs]]
* [[fatfs]]
+* [[ufs]]
* [[magic]]
* [[unionfs]]
* [[nfs]]
* [[symlink]]
* [[firmlink]]
+* [[fifo]]
* ...
@@ -112,6 +116,7 @@ The [[concept|concepts]] of translators creates its own problems, too:
* [[procfs]]
* [[nsmux]]
* [[netio]]
+* [[socketio]]
* [[tarfs]]
* [[gopherfs]]
* [[smbfs]]
@@ -122,7 +127,7 @@ The [[concept|concepts]] of translators creates its own problems, too:
*These Translators are available in the [hurdextras repository](http://savannah.nongnu.org/cvs/?group=hurdextras) but not yet described on this website. They are in varying stages of Development.*
* [jfs](http://www.nongnu.org/hurdextras/#jfs)
-* [httpfs](http://www.nongnu.org/hurdextras/#httpfs)
+* [[httpfs]]
* [memfs](http://www.nongnu.org/hurdextras/#memfs)
* [notice](http://www.nongnu.org/hurdextras/#notice)
* [pith](http://www.nongnu.org/hurdextras/#pith)
diff --git a/hurd/translator/auth.mdwn b/hurd/translator/auth.mdwn
index d9e70ec2..7fd4832c 100644
--- a/hurd/translator/auth.mdwn
+++ b/hurd/translator/auth.mdwn
@@ -1,12 +1,19 @@
-[[!meta copyright="Copyright © 2008 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2008, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
document under the terms of the GNU Free Documentation License, Version 1.2 or
any later version published by the Free Software Foundation; with no Invariant
Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
-is included in the section entitled
-[[GNU Free Documentation License|/fdl]]."]]"""]]
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+The *auth server* (or, *authentification server*).
+
+It is stated by `/hurd/init`.
+
+
+# Documentation
[[*The_Authentication_Server*|documentation/auth]], the transcript of a talk
about the details of the authentication mechanisms in the Hurd by Wolfgang
diff --git a/hurd/translator/eth-filter.mdwn b/hurd/translator/eth-filter.mdwn
index 36ef4217..b5dc8f8f 100644
--- a/hurd/translator/eth-filter.mdwn
+++ b/hurd/translator/eth-filter.mdwn
@@ -8,20 +8,45 @@ Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
is included in the section entitled [[GNU Free Documentation
License|/fdl]]."]]"""]]
-`eth-filter` is a translator that implements a very simple stateless firewal.
+`eth-filter` is a translator that implements a very simple stateless firewall.
+
# Source
[[source_repositories/incubator]], dde
-# Usage:
+
+# Usage
For instance, to drop any attempt to access port 22:
- settrans -c /dev/eth0f /hurd/eth-filter -i /dev/eth0 -r "not port 22"
+ # settrans -c /dev/eth0f /hurd/eth-filter -i /dev/eth0 -r "not port 22"
+
+This creates a `/dev/eth0f` device, which is the filtered version of
+`/dev/eth0`. One can then use `/dev/eth0f` instead of `/dev/eth0`:
+
+ # settrans /servers/socket/2 /hurd/pfinet -i /dev/eth0f [...]
+
+..., or run `dhclient /dev/eth0f`, or similar.
+
+See also Zheng Da's [[user/zhengda/howto]].
+
+
+# Open Issues
-This creates a /dev/eth0f device, which is the filtered version of /dev/eth0. One can then configure network by hand using /dev/eth0f instead of /dev/eth0:
+## IRC, freenode, #hurd, 2013-07-27
- settrans /servers/socket/2 /hurd/pfinet -i /dev/eth0f ...
+[[!tag open_issue_hurd]]
-or run dhclient /dev/eth0f, etc.
+ <youpi> ok, so as usual we actually *already* have a firewall
+ <youpi> it's the eth-filter translator from zheng da
+ <youpi> it has just never been really pushed forward...
+ <teythoon> good news :)
+ <youpi> well, the bad news is that it probably doesn't support connection
+ tracking
+ <youpi> since it's just bpf
+ <youpi> using the libpcap syntax
+ <teythoon> well, a stateless fw should do for Debian/Hurds needs for now,
+ right?
+ <youpi> yes
+ <youpi> and it does work indeed
diff --git a/hurd/translator/examples.mdwn b/hurd/translator/examples.mdwn
index 867d4935..4947808e 100644
--- a/hurd/translator/examples.mdwn
+++ b/hurd/translator/examples.mdwn
@@ -16,7 +16,7 @@ or [hurd-extras](http://www.nongnu.org/hurdextras/).
cvs -z3 -d:pserver:anonymous@cvs.savannah.nongnu.org:/sources/hurdextras co <modulename>
-* httpfs translator
+* [[httpfs]] translator
<!-- Prevent ikiwiki / Markdown rendering bug. -->
@@ -28,7 +28,7 @@ or
$ cd tmp/
$ ls -l
-* ftpfs translator
+* [[ftpfs]] translator
<!-- Prevent ikiwiki / Markdown rendering bug. -->
@@ -67,13 +67,13 @@ This is not as fast as `tar czvf newfile.tar.gz all my files`, but at least it's
$ settrans -fgca /servers/socket/2 /hurd/pfinet -i <interface> -a <ip address> -m <subnet mask> -g <gateway ip>
-* Console translator -- setting up virtual consoles
+* [[Console]] translator -- setting up virtual consoles
<!-- Prevent ikiwiki / Markdown rendering bug. -->
$ console -d vga -d pc_mouse -d pc_kbd -d generic_speaker /dev/vcs
-* iso9660fs translator -- 'mounting' your cdrom
+* [[iso9660fs]] translator -- 'mounting' your cdrom
<!-- Prevent ikiwiki / Markdown rendering bug. -->
diff --git a/hurd/translator/exec.mdwn b/hurd/translator/exec.mdwn
index 54abba7e..1dc0ea26 100644
--- a/hurd/translator/exec.mdwn
+++ b/hurd/translator/exec.mdwn
@@ -1,4 +1,5 @@
-[[!meta copyright="Copyright © 2009, 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2009, 2012, 2013 Free Software Foundation,
+Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -11,4 +12,9 @@ License|/fdl]]."]]"""]]
The *exec* server, listening on `/servers/exec`, is responsible for
preparing the execution of processes.
+
+# Open Issues
+
+ * [[open_issues/exec]].
+
* [[open_issues/exec_memory_leaks]].
diff --git a/hurd/translator/ext2fs.mdwn b/hurd/translator/ext2fs.mdwn
index 20faed5e..e2f6b044 100644
--- a/hurd/translator/ext2fs.mdwn
+++ b/hurd/translator/ext2fs.mdwn
@@ -179,6 +179,69 @@ small backend stores, like floppy devices.
That would be a nice improvement, but only after writeback throttling is implemented.
+## Stripped vs. Unstripped `ext2fs.static`
+
+[[!tag open_issue_hurd]]
+
+
+### IRC, freenode, #hurd, 2013-09-17
+
+ <teythoon> I always had some trouble with dropping a rebuild ext2fs.static
+ into my test system and I never figured out why
+ <teythoon> I just followed a hunch and stripped the binary, and all of the
+ sudden it works
+ <teythoon> any ideas why?
+ <tschwinge> teythoon: I quick search found me:
+ <https://savannah.gnu.org/bugs/?8497> and
+ <http://news.gmane.org/find-root.php?message_id=%3c4090243E.2040605%40comcast.net%3e>.
+ <teythoon> tschwinge: ugh, thanks for the pointers ;)
+ <tschwinge> teythoon: They won't help too much I fear. Anyway, good
+ intuition (or whatever) ;-) that you found this out.
+ <tschwinge> teythoon: Not exactly related to stripped/unstripped per se
+ (that is, debug information), but in the past we've had an issue about
+ relro (see binutils/glibc, <http://www.airs.com/blog/archives/189>),
+ where a variable (that erroneously happend to be in such a read-only
+ section, if I remember correct) was tried to be modified -- which worked
+ "sometimes": depending on where exactly it was located in the binary
+ (which shifted around a page
+ <tschwinge> boundary by stripped/unstripped), it'd segfault or not. Burnt
+ several days on that before Samuel (IIRC) eventually figured it out.
+ <teythoon> tschwinge: well, thanks anyway ;)
+
+
+## Increased Memory Consumption
+
+### IRC, freenode, #hurd, 2013-09-18
+
+ <braunr> ext2fs is using a ginormous amount of memory on darnassus since i
+ last updated the hurd package :/
+ <braunr> i wonder if my ext2fs large store patches rework have introduced a
+ regression
+ <braunr> the order of magnitude here is around 1.5G virtual space :/
+ <braunr> it used to take up to 3 times less before that
+ <braunr> looks like my patches didn't make it into the latest hurd package
+ <braunr> teythoon: looks like there definitely is a new leak in ext2fs
+ <teythoon> :/
+ <braunr> memory only
+ <braunr> the number of ports looks stable relative to file system usage
+ <teythoon> braunr: I tested my patches on my development machine, it's up
+ for 14 days (yay libvirt :) and never encountered problems like this
+ <braunr> i've been building glibc to reach that state
+ <teythoon> hm, that's a heavy load indeed
+ <teythoon> could be the file name tracking stuff, I tried to make sure that
+ everything is freed, but I might have missed something
+ <braunr> teythoon: simply running htop run shows a slight, regular increase
+ in physical memory usage in ext2fs
+ <pinotree> old procfs stikes again? :)
+ <teythoon> braunr: I see that as well... curious...
+ <braunr> 16:46 < teythoon> could be the file name tracking stuff, I tried
+ to make sure that everything is freed, but I might have missed something
+ <braunr> how knows, maybe completely unrelated
+ <teythoon> the tracking patch isn't that big, I've gone over it twice today
+ and it still seems reasonable to me
+ <braunr> hm
+
+
# Documentation
* <http://e2fsprogs.sourceforge.net/ext2.html>
diff --git a/hurd/translator/fifo.mdwn b/hurd/translator/fifo.mdwn
new file mode 100644
index 00000000..857922fc
--- /dev/null
+++ b/hurd/translator/fifo.mdwn
@@ -0,0 +1,48 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+The *fifo* translator implements named pipes (FIFOs).
+
+
+# Open Issues
+
+## Not Terminating
+
+[[!tag open_issue_hurd]]
+
+
+### IRC, OFTC, #debian-hurd, 2013-07-28
+
+ <gg0> seems fifos started dying, as they should. am i wrong?
+ <gg0> ( http://bugs.debian.org/629184 )
+ <azeem> so you're saying the bug should be closed?
+ <azeem> best to comment on the bug then
+ <gg0> i didn't hear anyone working on it, so i'm a bit surprised
+ <azeem> could be due to lower-level fixes to glibc or so
+ <gg0> and given often(:|) i'm wrong, i was asking
+ <pinotree> in two years there have been various changes in glibc and hurd
+ <pinotree> (for example the switch to pthreads)
+ <gg0> yeah seems fixed. mknod'ing one then removing it, doesn't leave any
+ process around
+ <gg0> cool
+ <azeem> then please follow-up on the bug and/or close it
+ <gg0> sure
+ <gg0> the pleasure of closing it/them is yours
+ <gg0> great job, whatever you did :)
+
+
+### IRC, OFTC, #debian-hurd, 2013-07-29
+
+ * gg0 wonders if it can close savannah one as
+ wellhttps://savannah.gnu.org/bugs/?17128
+ <pochu> gg0: wdym?
+ <pochu> gg0: got an example?
+ <gg0> http://bugs.debian.org/629184
+ <gg0> i didn't close it myself
diff --git a/hurd/translator/firmlink.mdwn b/hurd/translator/firmlink.mdwn
index 038879db..b53396b0 100644
--- a/hurd/translator/firmlink.mdwn
+++ b/hurd/translator/firmlink.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2012, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -20,3 +20,15 @@ License|/fdl]]."]]"""]]
<braunr> infinity0: firmlinks
<infinity0> ah thanks i'll look that up
<kilobug> braunr: oh, true, I forgot about that one
+
+
+# Open Issues
+
+ * [[!GNU_Savannah_bug 29809]]
+
+ * IRC, freenode, #hurd, 2013-07-07
+
+ <youpi> ok, found the firmlink-mv issue
+ <youpi> will commit that
+ <pinotree> \o/
+ <youpi> (bit of mach_print in translators, took just a few hours)
diff --git a/hurd/translator/hostmux.mdwn b/hurd/translator/hostmux.mdwn
index 5fab2dc5..ef16505b 100644
--- a/hurd/translator/hostmux.mdwn
+++ b/hurd/translator/hostmux.mdwn
@@ -29,3 +29,18 @@ When <code>**/ftp**</code> is accessed, the first directory is interpreted as ho
You can see the new created translator in the process list: <code>**ps ax | grep ftpsfs**</code> . You shoud see <code>**/hurd/ftpfs / ftp.yourhost.com**</code> .
-- [[Main/PatrickStrasser]] - 13 Jul 2004
+
+
+# Open Issues
+
+## IRC, freenode, #hurd, 2013-09-21
+
+[[!tag open_issue_hurd]]
+
+ <jproulx> ls /http://<ip>:<port>/
+ <jproulx> the image came with a global translator though I see it doesn't
+ grokk the alternate port notation.
+ <youpi> oh right
+ <jproulx> I shall return to the fine documentation
+ <youpi> it's a hostmux, it doesn't understand ports
+ <youpi> damn, one thus can't url plain urls with that scheme
diff --git a/hurd/translator/httpfs.mdwn b/hurd/translator/httpfs.mdwn
new file mode 100644
index 00000000..dc4a62f7
--- /dev/null
+++ b/hurd/translator/httpfs.mdwn
@@ -0,0 +1,100 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+`httpfs` is a virtual filesystem allowing you to access web pages and files.
+
+
+# Source
+
+<http://www.nongnu.org/hurdextras/#httpfs>
+
+
+# Documentation
+
+## IRC, freenode, #hurd, 2013-09-03
+
+[[!tag open_issue_documentation]]
+
+ <congzhang> hi, why I can't cd to /http:/richtlijn.be/~larstiq/hurd/ to do
+ <congzhang> grep?
+ <pinotree> this is not ftp
+ <congzhang> it works for other file
+ <pinotree> ?
+ <congzhang> I can't cd to ~larstiq, I don't know why
+ <pinotree> http is not a filesystem protocol
+ <pinotree> while httpfs could try in representing it as such, it is not
+ something reliable
+ <congzhang> ok, it's not reliable
+ <congzhang> I expect it can expose dir like browser
+ <congzhang> so, the translator just know href from home page, and one by
+ one
+ <pinotree> uh?
+ <congzhang> if ...:80/a/b/c.png exits, but not has a href in homepage, so I
+ can't cd to a, right?
+ <pinotree> you are looking things from the wrong point of view
+ <pinotree> a web server can do anything with URLs, including redirecting,
+ URL rewriting and whatever else
+ <congzhang> so, how to understand httpfs's idea?
+ <congzhang> how httpfs list dir?
+ <pinotree> check its code
+ <congzhang> en, no need it's not reliable
+ <congzhang> it's not work, it's enough
+ <congzhang> I have an idea, for the file system, we explore dir level by
+ level, but for http, we change full path one
+ <congzhang> once time
+ <congzhang> maybe can allow user to cd any directory, and just mark as some
+ special color to make user know the translator was not sure, file exist
+ or not
+ <congzhang> once the file exits, mark all the parent directory as normal
+ color?
+ <rekado> congzhang: you can find more info about httpfs here:
+ http://nongnu.org/hurdextras/
+ <pinotree> congzhang: you're still looking at http from the wrong point of
+ view
+ <pinotree> there are no directories nor files
+ <pinotree> you start a request for a URL, and you get some content back (in
+ case there's no error)
+ <congzhang> you mean httpfs just for kidding?
+ <pinotree> that the content is a web page listing a directory on the
+ filesystem of the web server machine, or a file sent back via the
+ connection, or a complex web page, it's the same
+ <rekado> congzhang: you can only get a list of files if the web server
+ responds with an index of files
+ <pinotree> "files"
+ <rekado> The readme explains how httpfs does its thing:
+ http://cvs.savannah.gnu.org/viewvc/*checkout*/httpfs/README?root=hurdextras
+ <congzhang> if I can't cd to /http:/host/a/b how to get
+ /http:/host/a/b/c.html, even the file exist?
+ <pinotree> you don't cd in http
+ <pinotree> cd is for changing directory, which does not apply to a protocol
+ like http which is not fs-oriented
+ <congzhang> yes, I agree with you, http was not fs-oriented
+ <congzhang> so httpfs was not so useful
+ <rekado> You can access the document directly, though, can't you?
+ <congzhang> rekado: I try once more
+ <congzhang> I can't
+ <congzhang> so, the httpfs need some extend, http protocol was not fs
+ oriented, so need some extend to make it work with file system
+ <pinotree> http is not designed for file system usage, so extending it is
+ useless
+ <congzhang> or, httpfs was not so useful
+ <pinotree> there are many other protocols for file systems
+ <congzhang> I don't think so
+ <pinotree> i do
+ <congzhang> if we can't make it more useful, remove it from hurd rep, or
+ extend it more useful
+ <congzhang> add some more rule, to make it work with file system
+ <pinotree> no
+ <congzhang> some paradox in it
+ <pinotree> which paradox?
+ <congzhang> for http vs file system
+ <pinotree> ???
+ <congzhang> tree oriented and star topology oriented?
+ <pinotree> you don't make any sense
diff --git a/hurd/translator/netio.mdwn b/hurd/translator/netio.mdwn
index 12a3f55c..885b1cc0 100644
--- a/hurd/translator/netio.mdwn
+++ b/hurd/translator/netio.mdwn
@@ -11,15 +11,16 @@ License|/fdl]]."]]"""]]
`netio` is a translator designed for creating socket ports through the
filesystem.
+This is supposed to be replaced by the better [[socketio]].
+
# Source
[[source_repositories/incubator]], netio/master
-This is supposed to be replaced by the better socketio.
# Usage:
-e.g.
+For example:
-cat /tmp/netio/tcp/ftp.gnu.org/21
+ $ cat < ~/tmp/netio/tcp/ftp.gnu.org/21
diff --git a/hurd/translator/nsmux.mdwn b/hurd/translator/nsmux.mdwn
index d156772b..6b3be79c 100644
--- a/hurd/translator/nsmux.mdwn
+++ b/hurd/translator/nsmux.mdwn
@@ -1,12 +1,12 @@
-[[!meta copyright="Copyright © 2009 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2009, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
document under the terms of the GNU Free Documentation License, Version 1.2 or
any later version published by the Free Software Foundation; with no Invariant
Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
-is included in the section entitled
-[[GNU Free Documentation License|/fdl]]."]]"""]]
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
# nsmux
@@ -119,3 +119,24 @@ of the simplest use-case of namespace-based translator selection in
the form of translator `nsmux`. The filter is partially implemented
and this is the immediate goal. Propagating translators down
directories is the next objective.
+
+
+## Open Issues
+
+### IRC, freenode, #hurd, 2013-08-22
+
+[[!tag open_issue_hurd]]
+
+ < youpi> err, is nsmux supposed to work at all?
+ < youpi> a mere ls doesn't work
+ < youpi> I'm running it as a user
+ < youpi> echo * does work though
+ < teythoon> ah, yes, nsmux,,is,,funny :p
+ < youpi> well, perhaps but I can't make it work
+ < youpi> well, the trivial ,,hello does work
+ < youpi> but ,,tarfs doesn't seem to be working for instance
+ < youpi> same for ,,mboxfs
+ < youpi> ,,xmlfs seems to somehow work a bit, but not very far...
+ < youpi> so it seems just nobody is caring about putting READMEs wherever
+ appropriate
+ < youpi> e.g. examples in socketio/ ...
diff --git a/hurd/translator/pfinet.mdwn b/hurd/translator/pfinet.mdwn
index f6f69ea4..bf535b21 100644
--- a/hurd/translator/pfinet.mdwn
+++ b/hurd/translator/pfinet.mdwn
@@ -1,5 +1,5 @@
-[[!meta copyright="Copyright © 2002, 2004, 2005, 2007, 2008, 2011 Free Software
-Foundation, Inc."]]
+[[!meta copyright="Copyright © 2002, 2004, 2005, 2007, 2008, 2011, 2013 Free
+Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -33,6 +33,9 @@ installation.
* [[DHCP]].
+ * [[IPv6]].
+
+ * [[eth-filter]]: Firewall.
+
* [[Implementation]].
- * [[IPv6]].
diff --git a/hurd/translator/pfinet/implementation.mdwn b/hurd/translator/pfinet/implementation.mdwn
index 9bcf62ef..3e66c870 100644
--- a/hurd/translator/pfinet/implementation.mdwn
+++ b/hurd/translator/pfinet/implementation.mdwn
@@ -27,6 +27,170 @@ implementation.
<youpi> oh
<braunr> http://jl-icase.home.comcast.net/~jl-icase/LinuxTCP2.html
+## IRC, freenode, #hurd, 2013-09-03
+
+In context of the item on [[/contributing]].
+
+ <rekado> About this task: "Make pfinet OK with the ethernet device going
+ away." --- how can I test this? How can I remove the ethernet device?
+ <pinotree> settrans on the ethernet device, handled by the netdde
+ translator
+ <pinotree> that is, make it go away (settrans -fg)
+ <rekado> Ah, I see.
+ <rekado> Thanks
+ <pinotree> check its status before with showtrans
+ <pinotree> then, after having made it go away, set it again
+ <rekado> I don't think I'm doing this right... After `settrans -fg
+ /dev/eth0` I should not be able to access the network anymore, but it
+ still works.
+ <rekado> How can I figure out which of the four network devices is actually
+ used?
+ <braunr> rekado: the file system is used to open files, i.e. access
+ services
+ <braunr> it's not used to revoke access
+ <braunr> once pfinet has obtained a port to the network device, it keeps it
+ <rekado> oh, yes, of course. Sorry, this is all very
+ new to me.
+ <rekado> I'm not sure what the problem is that this task describes. In
+ what way is pfinet "not OK" with the ethernet device going away?
+ <braunr> rekado: the idea is to make pfinet able to cope with a driver
+ crash
+ <rekado> Can I trigger a driver crash for test purposes? (Or do I have to
+ build a purposefully broken driver first?)
+ <braunr> use kill
+ <rekado> Oh, good.
+ <braunr> iirc, netdde doesn't restart correctly :x
+ <braunr> you'll probably have to fix it a bit
+ <braunr> i guess there is some persistent state that prevents it from
+ reinitializing correctly
+ <rekado> okay
+ <rekado> I may need one more pointer: where can I find the netdde code?
+ Grep'ing around I only see it only mentioned as an argument to
+ /hurd/devnode; also: should I work in some incubator branch or directly
+ in the hurd repo?
+ <braunr> rekado: incubator branch
+ <rekado> Okay. Thank you for your patience. I'll play with this in the
+ next few days.
+ <braunr> enjoy
+ <rekado> :)
+
+
+### IRC, freenode, #hurd, 2013-09-05
+
+ <rekado> When I kill the /hurd/netdde process I can no longer access the
+ network (as expected);
+ <rekado> To restore connectivity I run "settrans -g eth0 /hurd/devnode -M
+ /dev/netdde eth0" from the /dev directory.
+ <rekado> When I access the network again everything is fine. (I do see a
+ message telling me "irq handler 11: release an dead delivery port"
+ <rekado> )
+ <rekado> Is it the goal to avoid having to run settrans again to run netdde
+ after it crashes or is killed?
+ <youpi> you don't need to run settrans again
+ <youpi> that should get triggered automatically
+ <rekado> Hmm, after killing netdde I get "Resource lost" when using wget.
+ <rekado> It doesn't seem to be restarted automatically.
+ <youpi> try again
+ <youpi> the first wget makes pfinet try to use netdde and fail, thus crash
+ <youpi> the second wil respawn pfinet
+ <youpi> ideally pfinet shouldn't die, that's a TODO mentioned in the
+ "contributing page"
+ <rekado> Ah, so that's what should be prevented.
+ <youpi> it's just a matter of making pfinet be fine with errors from the
+ eth translator, and simply reopen it instead of dying
+ <rekado> That's the thing I've been trying to figure out.
+ <rekado> when I run wget a second (or third) time I get a different error;
+ "Name or service not known."
+ <rekado> It's only okay again when I use settrans
+ <youpi> maybe the devnode translator also needs some fixing
+ <youpi> it's odd that I don't have the issue though
+ <rekado> I'm using the qemu image, updated just yesterday.
+ <youpi> same here
+ <youpi> anyway, now you know where to put your hands :)
+ <rekado> yes, thanks a lot.
+
+
+### IRC, freenode, #hurd, 2013-09-07
+
+ <rekado> in pfinet/ethernet.c:ethernet_open there's an assertion:
+ edev->ether_port == MACH_PORT_NULL
+ <rekado> This is violated when netdde was killed and the device is
+ reopened.
+ <rekado> I'm not sure what should be done: destroy the port before
+ reopening or drop the assertion?
+ <rekado> If I drop the assertion, Mach seems to handle this just fine.
+ <rekado> Says "irq handler 11: release an [sic] dead delivery port" and
+ then carries on without problems.
+ <rekado> Is this a warning or an error, or can this be ignored?
+ <rekado> (or none of the above?)
+
+
+### IRC, freenode, #hurd, 2013-09-08
+
+ <rekado> I have a simple patch for pfinet that lets it recover from an
+ error in ethernet_xmit when /hurd/netdde and /hurd/devnode have been
+ killed.
+ <rekado> It doesn't work, though, when only netdde has been killed.
+ <rekado> With devnode still around device_open fails with "(ipc/send)
+ invalid destination port"
+ <rekado> I don't know where device_open is defined and why this error is
+ returned.
+ <rekado> I guess the error refers to the "master_device" port returned by
+ file_name_lookup() in ethernet_open()
+ <rekado> Why would file_name_lookup() return an invalid port when netdde is
+ dead but devnode is still running?
+ <braunr> rekado: maybe because devnode needs to perform a fresh lookup as
+ well
+
+
+### IRC, freenode, #hurd, 2013-09-09
+
+ <rekado> braunr: re devnode: devnode only performs a single lookup in
+ parse_opt(), i.e. at start-up.
+ <rekado> I'll try to understand devnode enough to patch it.
+ <braunr> rekado: that's the problem
+ <braunr> it should perform a lookup every time it's opened
+
+[[!message-id "1378730237-8091-1-git-send-email-rekado@elephly.net"]],
+[[!message-id "1378731824-8928-1-git-send-email-rekado@elephly.net"]].
+
+ <rekado> I submitted two patches to the mailing list. I've tested them on
+ Debian GNU/Hurd but based them on the incubator/dde branch.
+ <teythoon> rekado: awesome, reliability fixes are very much welcome
+
+
+### IRC, freenode, #hurd, 2013-09-18
+
+ <rekado> youpi: my apologies for the delay in getting back to you with
+ improvements to my pfinet/devnode patches. Been very busy.
+ <braunr> rekado: development pace on the hurd has always been slow, no need
+ to apologize
+
+## MAC Addresses
+
+[[!tag open_issue_hurd]]
+
+
+### IRC, freenode, #hurd, 2013-09-21
+
+ <jproulx> what command will show me the MAC address of an interface?
+ <youpi> ah, too bad inetutils-ifconfig doesn't seem to be showing it
+ <youpi> I don't think we already have a tool for that
+ <youpi> it would be a matter of patching inetutils-ifconfig
+
+
+## Routing Tables
+
+[[!tag open_issue_hurd]]
+
+
+### IRC, freenode, #hurd, 2013-09-21
+
+ <jproulx> Hmmm, OK I can work around that, what about routing tables, can I
+ see them? can I add routes besides the pfinet -g default route?
+ <youpi> I don't think there is a tool for that yet
+ <youpi> it's not plugged inside pfinet anyway
+
# Reimplementation, [[!GNU_Savannah_task 5469]]
@@ -58,3 +222,6 @@ implementation.
<youpi> I used it for the stubdomains in Xen
<youpi> (it = lwip)
<braunr> ok
+
+Cloudius OSv apparently have isolated/re-used a BSD networking stack,
+<http://www.osv.io/>, <https://github.com/cloudius-systems/osv>.
diff --git a/hurd/translator/pflocal.mdwn b/hurd/translator/pflocal.mdwn
index dc2434dc..fdcc39f1 100644
--- a/hurd/translator/pflocal.mdwn
+++ b/hurd/translator/pflocal.mdwn
@@ -1,13 +1,35 @@
-[[!meta copyright="Copyright © 2000, 2008 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2000, 2008, 2013 Free Software Foundation,
+Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
document under the terms of the GNU Free Documentation License, Version 1.2 or
any later version published by the Free Software Foundation; with no Invariant
Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
-is included in the section entitled
-[[GNU Free Documentation License|/fdl]]."]]"""]]
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
The implementation of the `pflocal` server is in the `pflocal` directory, and
uses [[`libpipe`|libpipe]] (shared code with the [[named_pipe|fifo]]
implementation).
+
+
+# Open Issues
+
+## `SO_REUSEADDR`
+
+### IRC, freenode, #hurd, 2013-09-19
+
+ <gnu_srs> Hi, is SO_REUSEADDR supported at all on Hurd? I can only find two
+ entries:
+ <gnu_srs> in libdde-linux26 and pfinet/linux-src, and the functionality
+ seems to be unimplemented.
+ <pinotree> gnu_srs: pfinet supports it
+ <youpi> gnu_srs: grep talks about pfinet/linux-src/net/core/sock.c:
+ case SO_REUSEADDR:
+ <youpi> two times
+ <gnu_srs> Yes, and that is the implementation?
+ <gnu_srs> I wrote a test for AF_INET and it works, but not for AF_UNIX
+ (maybe not so interesting case).
+ <pinotree> pflocal does not support it
+ <gnu_srs> Is that of interest at all?
diff --git a/hurd/translator/proc.mdwn b/hurd/translator/proc.mdwn
new file mode 100644
index 00000000..d5e0960c
--- /dev/null
+++ b/hurd/translator/proc.mdwn
@@ -0,0 +1,75 @@
+[[!meta copyright="Copyright © 2012, 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+The *proc server* (or, *process server*) implements some aspects of [[Unix]]
+processes.
+
+It is stated by `/hurd/init`.
+
+
+# "Unusual" PIDs
+
+[[!tag open_issue_hurd]]
+
+
+## IRC, freenode, #hurd, 2012-08-10
+
+ <braunr> too bad the proc server has pid 0
+ <braunr> top & co won't show it
+
+
+## IRC, OFTC, #debian-hurd, 2012-09-18
+
+ <pinotree> youpi: did you see
+ https://enc.com.au/2012/09/careful-with-pids/'
+ <pinotree> ?
+ <youpi> nope
+
+
+## IRC, OFTC, #debian-hurd, 2013-06-23
+
+ <teythoon> I've got this idea about the pid 1 issue you mentioned
+ <teythoon> can't we just make init pid 1?
+ <teythoon> I mean the mapping is rather arbitrary, we could make our init
+ pid 2 or something and start sysvs init as pid 1
+ <pinotree> not totally sure it is that arbitrary up to the first 4-5 pids
+ <teythoon> y is that?
+ <pinotree> at least i see in hurd's code that /hurd/init is assumed as
+ pid=1
+ <teythoon> hurds init that has to stick around for the fs translator sync?
+ <pinotree> hurd's init does the basic server startup
+ <pinotree> iirc it also takes care of shutdown/reboot
+ <teythoon> that's what I meant
+ <teythoon> and if it wouldn't have to stick around for the translator sync
+ it could just exec sysvinit
+ <teythoon> I just think it's easier to patch hurd than to remove the
+ assumption that init is pid 1 from sysvinit
+
+
+## IRC, freenode, #hurd, 2013-09-13
+
+ <braunr> teythoon: also, as a feature request, i'd like the proc server not
+ to have pid 0, if you have any time to do that
+ <braunr> so it appears in top and friends
+ <teythoon> braunr: noted, that should be easy
+ <teythoon> not using 0 is probably a good thing, many things use pid 0 as
+ something special
+
+
+# Process Discovery
+
+## IRC, freenode, #hurd, 2013-08-26
+
+ < teythoon> somewhat related, I do not like the way the proc server just
+ creates processes for new mach tasks it discovers
+ < teythoon> that does not play well with subhurds for example
+ < braunr> teythoon: i agree with you on proc process-to-task mapping
+ < braunr> that's something i intend to completely rework on propel
+ < braunr> in a way similar to how pid namespaces work on linux
diff --git a/hurd/translator/procfs/jkoenig/discussion.mdwn b/hurd/translator/procfs/jkoenig/discussion.mdwn
index d26f05f9..44b8cc77 100644
--- a/hurd/translator/procfs/jkoenig/discussion.mdwn
+++ b/hurd/translator/procfs/jkoenig/discussion.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2010, 2011, 2012 Free Software Foundation,
+[[!meta copyright="Copyright © 2010, 2011, 2012, 2013 Free Software Foundation,
Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
@@ -14,12 +14,13 @@ License|/fdl]]."]]"""]]
[[!toc]]
-# Miscellaneous
+# `/proc/version`
-IRC, #hurd, around September 2010
+[[!taglink open_issue_documentation]]: edit and move to [[FAQ]].
+
+
+## IRC, freenode, #hurd, around 2010-09
- <youpi> jkoenig: is it not possible to provide a /proc/self which points at
- the client's pid?
<pinotree> (also, shouldn't /proc/version say something else than "Linux"?)
<youpi> to make linux tools work, no :/
<youpi> kfreebsd does that too
@@ -33,10 +34,103 @@ IRC, #hurd, around September 2010
<youpi> Linux version 2.6.16 (des@freebsd.org) (gcc version 4.3.5) #4 Sun
Dec 18 04:30:00 CET 1977
<pinotree> k
- <giselher> I had some problems with killall5 to read the pid from /proc, Is
- this now more reliable?
- <youpi> I haven't tested with jkoenig's implementation
- [...]
+
+
+## IRC, freenode, #hurd, 2013-06-04
+
+ <safinaskar> ?@?#@?$?@#???!?!?!?!??!?!?!?! why /proc/version on gnu system
+ reports "Linux version 2.6.1 (GNU 0.3...)"?
+ <braunr> safinaskar: because /proc/version is a linux thing
+ <braunr> applications using it don't expect to see anything else than linux
+ when parsing
+ <braunr> think of it as your web brower allowing you to set the user-agent
+ <safinaskar> braunr: yes, i just thought about user-agent, too
+ <safinaskar> braunr: but freebsd doesn't report it is linux (as well as i
+ know)
+ <braunr> their choice
+ <braunr> we could change it, but frankly, we don't care
+ <safinaskar> so why "uname" says "GNU" and not "Linux"?
+ <braunr> uname is posix
+ <braunr> note that /proc/version also includes GNU and GNU Mach/Hurd
+ versions
+ <safinaskar> if some program read the word "Linux" from /proc/version, it
+ will assume it is linux. so, i think it is bad idea
+ <braunr> why ?
+ <safinaskar> there is no standard /proc across unixen
+ <braunr> if a program reads /proc/version, it expects to be run on linux
+ <safinaskar> every unix implement his own /proc
+ <safinaskar> so, we don't need to create /proc which is fully compatible
+ with linux
+ <braunr> procfs doesn't by default
+ <safinaskar> instead, we can make /proc, which is partially compatible with
+ linux
+ <braunr> debiansets the -c compatibility flag
+ <braunr> that's what we did
+ <safinaskar> but /proc/version should really report kernel name and its
+ version
+ <braunr> why ?
+ <braunr> (and again, it does)
+ <safinaskar> because this is why /proc/version created
+ <pinotree> no?
+ <braunr> on linux, yes
+ <braunr> pinotree: hm ?
+ <safinaskar> and /proc/version should not contain the "Linux" word, because
+ this is not Linux
+ <braunr> pinotree: no to what ? :)
+ <braunr> safinaskar: *sigh*
+ <braunr> i explained the choice to you
+ <pinotree> safinaskar: if you are using /proc/version to get the kernel
+ name and version, you're doing bad already
+ <braunr> disagree if you want
+ <braunr> but there is a point to using the word Linux there
+ <pinotree> safinaskar: there's the proper aposix api for that, which is
+ uname
+ <safinaskar> pinotree: okey. so why we ever implement /proc/version?
+ <braunr> it's a linux thing
+ <braunr> they probably wanted more than what the posix api was intended to
+ do
+ <safinaskar> okey, so why we need this linux thing? there is a lot of
+ linux thing which is useful in hurd. but not this thing. because this
+ is not linux. if we support /proc/version, we should not write "Linux"
+ to it
+ <pinotree> and even on freebsd their linprocfs (mounted on /proc) is not
+ mounted by default
+ <braunr> 10:37 < braunr> applications using it don't expect to see anything
+ else than linux when parsing
+ <braunr> 10:37 < braunr> think of it as your web brower allowing you to set
+ the user-agent
+ <braunr> safinaskar: the answer hasn't changed
+ <safinaskar> pinotree: but they don't export /proc/version with "Linux"
+ word in it anyway
+ <pinotree> safinaskar: they do
+ <safinaskar> pinotree: ??? their /proc/version contain Linux?
+ <pinotree> Linux version 2.6.16 (des@freebsd.org) (gcc version 4.6.3) #4
+ Sun Dec 18 04:30:00 CET 1977
+ <kilobug> safinaskar: it's like all web browsers reporting "mozilla" in
+ their UA, it may be silly, but it's how it is for
+ compatibility/historical reasons, and it's just not worth the trouble of
+ changing it
+ <pinotree> that's on a debian gnu/kfreebsd machine
+ <pinotree> and on a freebsd machine it is the same
+ <braunr> safinaskar: you should understand that parsing this string allows
+ correctly walking the rest of the /proc tree
+ <pinotree> and given such filesystem on freebsd is called "linprocfs", you
+ can already have a guess what it is for
+ <kilobug> safinaskar: saying "Linux version 2.6.1" just means "I'm
+ compatible with Linux 2.6.1 interfaces", like saying "Mozilla/5.0 (like
+ Gecko)" in the UA means "I'm a modern browser"
+ <safinaskar> so, is there really a lot of programs which expect "Linux"
+ word in /proc/version even on non-linux platforms?
+ <braunr> no
+ <braunr> but when they do, they do
+
+
+# `/proc/self`
+
+## IRC, freenode, #hurd, around 2010-09
+
+ <youpi> jkoenig: is it not possible to provide a /proc/self which points at
+ the client's pid?
<pinotree> looks like he did 'self' too, see rootdir_entries[] in rootdir.c
<youpi> but it doesn't point at self
<antrik> youpi: there is no way to provide /proc/self, because the server
@@ -56,10 +150,13 @@ IRC, #hurd, around September 2010
<youpi> it "just" needs to be commited :)
<antrik> in either case, it can't hurt to bring this up again :-)
+[[community/gsoc/project_ideas/mtab/discussion]], *IRC, freenode, #hurd,
+2013-09-07*.
+
# root group
-IRC, #hurd, around October 2010
+## IRC, freenode, #hurd, around October 2010
<pinotree> the only glitch is that files/dirs have the right user as
owner, but always with root group
@@ -67,7 +164,7 @@ IRC, #hurd, around October 2010
# `/proc/[PID]/stat` being 400 and not 444, and some more
-IRC, freenode, #hurd, 2011-03-27
+## IRC, freenode, #hurd, 2011-03-27
<pochu> is there a reason for /proc/$pid/stat to be 400 and not 444 like on
Linux?
@@ -112,7 +209,8 @@ IRC, freenode, #hurd, 2011-03-27
/proc uses rather than rely on CLK_TCK
<jkoenig> (so we can choose whatever reasonable value we want)
-IRC, freenode, #hurd, 2011-03-28
+
+## IRC, freenode, #hurd, 2011-03-28
<antrik> jkoenig: does procfs expose any information that is not available
to everyone through the proc server?...
@@ -165,7 +263,8 @@ IRC, freenode, #hurd, 2011-03-28
<antrik> (though I never got around to look at his buggy code...)
<jkoenig> ok
-IRC, freenode, #hurd, 2011-07-22
+
+## IRC, freenode, #hurd, 2011-07-22
<pinotree> hm, why /proc/$pid/stat is 600 instead of 644 of linux?
<jkoenig> pinotree, it reveals information which, while not that sensitive,
@@ -186,7 +285,7 @@ IRC, freenode, #hurd, 2011-07-22
# `/proc/mounts`, `/proc/[PID]/mounts`
-IRC, freenode, #hurd, 2011-07-25
+## IRC, freenode, #hurd, 2011-07-25
< pinotree> jkoenig: btw, what do you think about providing empty
/proc/mounts and /proc/$pid/mounts files?
@@ -206,17 +305,34 @@ IRC, freenode, #hurd, 2011-07-25
i don't remember)
< pinotree> not a strict need
+See also [[community/gsoc/project_ideas/mtab]].
+
-# `/proc/[PID]/auxv`, `/proc/[PID]/exe`, `/proc/[PID]/mem`
+## IRC, freenode, #hurd, 2013-09-20
+
+ <pinotree> teythoon: should procfs now have $pid/mounts files pointing to
+ ../mounts?
+ <teythoon> pinotree: probably yes
+
+
+# `/proc/[PID]/auxv`
Needed by glibc's `pldd` tool (commit
11988f8f9656042c3dfd9002ac85dff33173b9bd).
-# `/proc/self/exe`
+# `/proc/[PID]/exe`
+
+Needed by glibc's `pldd` tool (commit
+11988f8f9656042c3dfd9002ac85dff33173b9bd).
+
+
+## `/proc/self/exe`
[[!message-id "alpine.LFD.2.02.1110111111260.2016@akari"]]. Needed by glibc's
`stdlib/tst-secure-getenv.c`.
+`HAVE_PROC_SELF_EXE` in `[GCC]/libjava/configure.ac`.
+Also used in `[GCC]/libgfortran/runtime/main.c`:`store_exe_path`.
Is it generally possible to use something like the following instead?
Disadvantage is that every program using this needs to be patched.
@@ -314,32 +430,25 @@ This is used in `[LLVM]/lib/Support/Unix/Path.inc`.
report why the test suite failed
-# `/proc/[PID]/cwd`
+## `/proc/self/maps`
-## IRC, freenode, #hurd, 2012-06-30
-
- * pinotree has a local work to add the /proc/$pid/cwd symlink, but relying
- on "internal" (but exported) glibc functions
+`HAVE_PROC_SELF_MAPS` in `[GCC]/libjava/configure.ac`.
+Also used in `[GCC]/intl/relocatable.c`:`find_shared_library_fullname` for
+`#ifdef __linux__`.
-# "Unusual" PIDs
+# `/proc/[PID]/mem`
-Not actually related to procfs, but here seems to be a convenient place for
-filing these:
-
-
-## IRC, freenode, #hurd, 2012-08-10
+Needed by glibc's `pldd` tool (commit
+11988f8f9656042c3dfd9002ac85dff33173b9bd).
- <braunr> too bad the proc server has pid 0
- <braunr> top & co won't show it
+# `/proc/[PID]/cwd`
-## IRC, OFTC, #debian-hurd, 2012-09-18
+## IRC, freenode, #hurd, 2012-06-30
- <pinotree> youpi: did you see
- https://enc.com.au/2012/09/careful-with-pids/'
- <pinotree> ?
- <youpi> nope
+ * pinotree has a local work to add the /proc/$pid/cwd symlink, but relying
+ on "internal" (but exported) glibc functions
# CPU Usage
diff --git a/hurd/translator/socketio.mdwn b/hurd/translator/socketio.mdwn
index 99a28416..de5cf252 100644
--- a/hurd/translator/socketio.mdwn
+++ b/hurd/translator/socketio.mdwn
@@ -11,15 +11,36 @@ License|/fdl]]."]]"""]]
`socketio` is a translator designed for creating socket ports through the
filesystem.
+This is supposed to replace [[netio]].
+
# Source
[[source_repositories/incubator]], socketio/master
-This is supposed to replace netio.
# Usage:
-e.g.
+For example:
+
+ $ cat < ~/tmp/socketio/tcp/ftp.gnu.org/21
+
+
+# Open Issues
+
+## IRC, freenode, #hurd, 2013-06-30
+
+[[!tag open_issue_hurd]]
-cat /tmp/socketio/tcp/ftp.gnu.org/21
+ <youpi> http://lists.gnu.org/archive/html/bug-hurd/2003-05/msg00069.html
+ <youpi> this was supposed to be much better than our current netio
+ <youpi> (which doesn't really have any documentation btw)
+ <teythoon>
+ http://web.archive.org/web/20060117085538/http://duesseldorf.ccc.de/~moritz/files/socketio.c.gz
+ <teythoon> youpi: socketio looks nice. any reason in particular why you are
+ working on it?
+ <youpi> teythoon: I was looking at the firewall stuff, and wondering about
+ Zheng Da's work, and seen netio, thus wondered "what is it about
+ already?" and found there was no documentation, so dug into the mailing
+ list archives, only to find it was supposed to be precated in favour of
+ socketio, etc. :)
diff --git a/hurd/translator/tmpfs/tmpfs_vs_defpager.mdwn b/hurd/translator/tmpfs/tmpfs_vs_defpager.mdwn
index 5228515f..16a23405 100644
--- a/hurd/translator/tmpfs/tmpfs_vs_defpager.mdwn
+++ b/hurd/translator/tmpfs/tmpfs_vs_defpager.mdwn
@@ -1,4 +1,5 @@
-[[!meta copyright="Copyright © 2010, 2011 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2010, 2011, 2013 Free Software Foundation,
+Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -270,3 +271,129 @@ See also: [[open_issues/resource_management_problems/pagers]].
<antrik> only problem is that the current defpager implementation can't
really handle that...
<antrik> at least that's my understanding of the situation
+
+
+# IRC, freenode, #hurd, 2013-07-05
+
+ <teythoon> btw, why does the tmpfs translator have to talk to the pager?
+ <teythoon> to get more control about how the memory is paged out?
+ <teythoon> read lot's of irc logs about tmpfs on the wiki, but I couldn't
+ find the answer to that
+ <mcsim> teythoon: did you read this?
+ http://www.gnu.org/software/hurd/hurd/translator/tmpfs/tmpfs_vs_defpager.html
+ <teythoon> mcsim: I did
+ <mcsim> teythoon: Last discussion, i think has very good point.
+ <mcsim> To provide memory objects you should implement pager interface
+ <mcsim> And if you implement pager interface you are the one who is asked
+ to write data to backing storage to evict them
+ <mcsim> But tmpfs doesn't do this
+ <teythoon> mmm, clients doing mmap...
+ <mcsim> teythoon: You don't have mmap
+ <mcsim> teythoon: mmap is implemented on top of mach interface
+ <mcsim> teythoon: I mean you don't have mmap at this level
+ <teythoon> mcsim: sure, but that's close enough for me at this point
+ <mcsim> teythoon: diskfs interface requires implementor to provide a memory
+ object port (send right)
+ <mcsim> Guest8183: Why tmpfs requires defpager
+ <Guest8183> how did you get to talk about that ?
+ <mcsim> I was just asked
+ <teythoon> Guest8183: it's just so unsettling that tmpfs has to be started
+ as root :/
+ <Guest8183> teythoon: why ?
+ *** Guest8183 (~rbraun@dalaran.sceen.net) is now known as braunr_
+ <teythoon> braunr_: b/c starting translators isn't a privileged operation,
+ and starting a tmpfs translator that doesn't even access any device but
+ "just" memory shouldn't require any special privileges as well imho
+ <teythoon> so why is tmpfs not based on say libnetfs? b/c it is used for
+ d-i and someone (apt?) mmaps stuff?
+ <pinotree> being libdiskfs-based isn't much the issue, iirc
+ <pinotree> http://lists.gnu.org/archive/html/bug-hurd/2013-03/msg00014.html
+ too
+ <kilobug> teythoon: AFAIK apt uses mmap, yes
+ <braunr_> teythoon: right
+ <braunr_> a ramfs is actually tricky to implement well
+ <mcsim> braunr_: What do you mean under "to implement well"?
+ <braunr_> as efficiently as possible
+ <braunr_> i.e. being as close as possible to the page cache for minimum
+ overhead
+ <mcsim> braunr: AFAIK ramfs should not use swap partition, so page cache
+ shouldn't be relevant for it.
+ <braunr> i'm talking about a ramfs in general
+ <braunr> not the specific linux ramfs
+ <braunr> in linux, what they call ramfs is the tiny version of tmpfs that
+ doesn't use swap
+ <braunr> i actually don't like "tmpfs" much
+ <braunr> memfs may be more appropriate
+ <braunr> anyway
+ <mcsim> braunr: I see. And do you consider defpager variant as "close as
+ possible to the page cache"?
+ <braunr> not far at least
+ <braunr> if we were able to use it for memory obects, it would be nice
+ <braunr> but defpager only gets attached to objects when they're evicted
+ <braunr> before that, anonymous (or temporary, in mach terminology) objects
+ have no backing store
+ <braunr> this was probably designed without having tmpfs in mind
+ <braunr> i wonder if it's possible to create a memory object without a
+ backing store
+ <mcsim> what should happen to it if kernel decides to evict it?
+ <braunr> it sets the default pager as its backing store and pushes it out
+ <mcsim> that's how it works now, but you said "create a memory object
+ without a backing store"
+ <braunr> mach can do that
+ <braunr> i was wondering if we could do that too from userspace
+ <mcsim> mach does not evict such objects, unless it bound a defpager to
+ them
+ <mcsim> but how can you handle this in userspace?
+ <braunr> i mean, create a memory object with a null control port
+ <braunr> mcsim: is that clearer ?
+ <mcsim> suppose you create such object, how kernel will evict it if kernel
+ does not know who is responsible for eviction of this object?
+ <braunr> it does
+ <braunr> 16:41 < braunr> it sets the default pager as its backing store and
+ pushes it out
+ <braunr> that's how i intend to do it on x15 at least
+ <braunr> but it's much simpler there because uvm provides better separation
+ between anonymous and file memory
+ <braunr> whereas they're much too similar in mach vm
+ <mcsim> than what the difference between current situation, when you
+ explicitly invoke defpager to create object and implicit method you
+ propose?
+ <braunr> you don't need a true defpager unless you actually have swap
+ <mcsim> ok
+ <mcsim> now I see
+ <braunr> it also saves the communication overhead when initializing the
+ object
+ <mcsim> thank you
+ <braunr> which may be important since we use ramfs for speed mostly
+ <mcsim> agree
+ <braunr> it should also simplify the defpager implementation, since it
+ would only have a single client, the kernel
+ <braunr> which may also be important with regard to global design
+ <braunr> one thing which is in my opinion very wrong with mach is that it
+ may be a client
+ <braunr> a well designed distributed system should normally not allow on
+ component to act as both client and server toward another
+ <braunr> i.e. the kernel should only be a server, not a client
+ <braunr> and there should be a well designed server hierarchy to avoid
+ deadlocks
+ <braunr> (such as the one we had in libpager because of that)
+ <mcsim> And how about filesystem? It acts both as server and as client
+ <braunr> yes
+ <braunr> but not towards the same other component
+ <braunr> application -> file system -> kernel
+ <braunr> no "<->"
+ <braunr> the qnx documentation explains that quite well
+ <braunr> let me see if i can find the related description
+ <mcsim> Basically, I've got your point. And I would rather agree that
+ kernel should not act as client
+ <braunr> mcsim:
+ http://www.qnx.com/developers/docs/6.4.0/neutrino/sys_arch/ipc.html#Robust
+ <braunr> one way to implement that (and qnx does that too) is to make
+ pagers act as client only
+ <braunr> they sleep in the kernel, waiting for a reply
+ <braunr> and when the kernel needs to evict something, a reply is sent
+ <braunr> (qnx doesn't actually do that for paging, but it's a general idea)
+ <mcsim> braunr: how hierarchy of senders is enforced?
+ <braunr> it's not
+ <braunr> developers must take care
+ <braunr> same as locking, be careful about it
diff --git a/hurd/translator/ufs.mdwn b/hurd/translator/ufs.mdwn
new file mode 100644
index 00000000..4d611e95
--- /dev/null
+++ b/hurd/translator/ufs.mdwn
@@ -0,0 +1,38 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+The `ufs` translator supports some kind of the Unix File System. Beware, we're
+not aware of anybody having used/tested it in ages, so maybe it is very broken
+and will eat your data.
+
+
+# IRC, freenode, #hurd, 2013-08-30
+
+[[!tag open_issue_hurd]]
+
+ <Arne`> There might be a copyright problem: <nalaginrut> well, there seems
+ BSD-4clauses in the code:
+ http://git.savannah.gnu.org/cgit/hurd/hurd.git/tree/ufs/alloc.c
+ <Arne`> braunr, tschwinge: Do you have any info on that? 4-clause BSD and
+ GPL on the same code are a license incompatibility…
+ <tschwinge> Arne`: I've put it onto my (long) TODO list.
+ <tschwinge> Easiest solution might be: rm -rf ufs.
+ <nalaginrut> will these affected code rewritten? or just modify license?
+ <mark_weaver> only the regents of the University of California could choose
+ to modify the license.
+ <youpi> nalaginrut: one can't modify a licence if one is not the author
+ <youpi> we can simply dump the code
+ <mark_weaver> s/author/owner/
+ <tschwinge> As I suppose ufs is unused/untested for a decade or so, I'd
+ have no issues with simply removing it from the tree, together with
+ ufs-fsck and ufs-utils.
+ <pinotree> tschwinge: or maybe extract the ufs stuff in an own repo, to be
+ imported as branch in incubator or own hurd/ufs.git?
+ <tschwinge> Sure, why not.
diff --git a/libpthread.mdwn b/libpthread.mdwn
index 801a1a79..0f7f28fe 100644
--- a/libpthread.mdwn
+++ b/libpthread.mdwn
@@ -1,4 +1,5 @@
-[[!meta copyright="Copyright © 2010, 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2010, 2012, 2013 Free Software Foundation,
+Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -59,6 +60,9 @@ even if the current number of threads is lower.
The same issue exists in [[hurd/libthreads]].
+The current implementation in libpthread is
+[[buggy|open_issues/libpthread/t/fix_have_kernel_resources]].
+
# Open Issues
diff --git a/microkernel/discussion.mdwn b/microkernel/discussion.mdwn
index a5a73e18..f5626f6c 100644
--- a/microkernel/discussion.mdwn
+++ b/microkernel/discussion.mdwn
@@ -10,7 +10,7 @@ License|/fdl]]."]]"""]]
[[!tag open_issue_documentation]]
-IRC, freenode, #hurd, 2011-07-26:
+# IRC, freenode, #hurd, 2011-07-26
< antrik> Tekk_`: regarding microkernels: the basic idea, and really the
*only* fundamental difference, is that they isolate things in separate
@@ -22,3 +22,41 @@ IRC, freenode, #hurd, 2011-07-26:
these are secondary effects: such choices can also be implemented in a
monolithic architecture -- and not necessarily harder. just less obvious
in some cases...
+
+
+# IRC, freenode, #hurd, 2013-08-28
+
+ <Spyro> ok question
+ <Spyro> what is the big advantage of microkernels over monolithic kernels
+ as you guys see it?
+ <Spyro> is it entirely for the benefit of developers or are there actaully
+ practical advantages?
+ <kilobug> Spyro: there are many advantages, at least in theory, in terms of
+ modularity, flexibility, stability, scalability, security, ... which are
+ for everyone
+ <braunr> Spyro: of course some advantages are practical
+ <braunr> for me, the main advantage is system extensibility
+ <braunr> you can replace system services at runtime
+ <braunr> and on the hurd, you can do it as an unprivileged user
+ <braunr> (the direct side effect is far increased security)
+ <braunr> kilobug: i don't see the scalability advantages though
+ <kilobug> braunr: I would say it goes in par with the modularity, like, you
+ can have a full-weight IPv4/IPv6 stack for desktop, but a minimal stack
+ for embeded
+ <braunr> i see
+ <braunr> for me, it's in par with extensibility :)
+ <braunr> i see modularity only as an implementation of extensibility
+ <braunr> or a special case of it
+ <braunr> Spyro: basically, it's supposed to bring the same advantages as
+ fuse, but even more so (because it's not limited to file systems), and
+ better (because it's normally well integrated with the core of the
+ system)
+ <teythoon> also, fuse is kind of bolted on and Linux composes really badly
+ <teythoon> e.g. it is not possible to nfs export a fuse mounted filesystem
+ on Linux
+ <braunr> bolted ?
+ <teythoon> isn't that the term? as in being attached using screws?
+ <braunr> i'm not familiar with it :p
+ <azeem> "a posteriori design"
+ <teythoon> yes
+ <braunr> ok
diff --git a/microkernel/l4.mdwn b/microkernel/l4.mdwn
index de311497..ef39616b 100644
--- a/microkernel/l4.mdwn
+++ b/microkernel/l4.mdwn
@@ -1,5 +1,5 @@
-[[!meta copyright="Copyright © 2004, 2006, 2007, 2008, 2010, 2011, 2012 Free
-Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2004, 2006, 2007, 2008, 2010, 2011, 2012, 2013
+Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -30,6 +30,14 @@ is now stalled.
Genode and L4: http://www.youtube.com/user/drsartakov?feature=watch
+# IRC, freenode, #hurd, 2013-08-26
+
+ < Spyro> also
+ < Spyro> what's the basic difference between mach and L4?
+ < braunr> l4 is a nanokernel whereas mach is a hybrid with high level
+ messaging and virtual memory services
+
+
[[!ymlfront data="""
sel4:
diff --git a/microkernel/mach/concepts.mdwn b/microkernel/mach/concepts.mdwn
index 0f7cbf00..08bce3f5 100644
--- a/microkernel/mach/concepts.mdwn
+++ b/microkernel/mach/concepts.mdwn
@@ -31,3 +31,20 @@ text="*[[mach\_kernel\_principles|documentation]]*:
In particular the [[!toggle id=mach_kernel_principles
text="[mach\_kernel\_principles]"]] book further elaborates on Mach's concepts
and principles.
+
+
+# IRC, freenode, #hurd, 2013-08-26
+
+ < stargater> then is mach not more microkernel
+ < stargater> when it have driver inside
+ < braunr> mach is a hybrid
+ < braunr> even without drivers
+ < stargater> in www i read mach is microkernel
+ < stargater> not hybrid
+ < braunr> the word microkernel usually includes hybrids
+ < braunr> true microkernels are also called nanokernels
+ < braunr> the word isn't that important, what matters is that mach does
+ more in kernel than what the microkernel principle implies
+ < braunr> e.g. high level async IPC and high level virtual memory
+ operations
+ < braunr> including physical memory management
diff --git a/microkernel/mach/deficiencies.mdwn b/microkernel/mach/deficiencies.mdwn
index 1294b8b3..8f47f61f 100644
--- a/microkernel/mach/deficiencies.mdwn
+++ b/microkernel/mach/deficiencies.mdwn
@@ -260,9 +260,9 @@ License|/fdl]]."]]"""]]
solve a number of problems... I just wonder how many others it would open
-# IRC, freenode, #hurd, 2012-09-04
+# X15
-X15
+## IRC, freenode, #hurd, 2012-09-04
<braunr> it was intended as a mach clone, but now that i have better
knowledge of both mach and the hurd, i don't want to retain mach
@@ -767,3 +767,1620 @@ In context of [[open_issues/multithreading]] and later [[open_issues/select]].
<braunr> imo, a rewrite is more appropriate
<braunr> sometimes, things done in x15 can be ported to the hurd
<braunr> but it still requires a good deal of effort
+
+
+## IRC, freenode, #hurd, 2013-04-26
+
+ <bddebian> braunr: Did I see that you are back tinkering with X15?
+ <braunr> well yes i am
+ <braunr> and i'm very satisfied with it currently, i hope i can maintain
+ the same level of quality in the future
+ <braunr> it can already handle hundreds of processors with hundreds of GB
+ of RAM in a very scalable way
+ <braunr> most algorithms are O(1)
+ <braunr> even waking up multiple threads is O(1) :)
+ <braunr> i'd like to implement rcu this summer
+ <bddebian> Nice. When are you gonna replace gnumach? ;-P
+ <braunr> never
+ <braunr> it's x15, not x15mach now
+ <braunr> it's not meant to be compatible
+ <bddebian> Who says it has to be compatible? :)
+ <braunr> i don't know, my head
+ <braunr> the point is, the project is about rewriting the hurd now, not
+ just the kernel
+ <braunr> new kernel, new ipc, new interfaces, new libraries, new everything
+ <bddebian> Yikes, now that is some work. :)
+ <braunr> well yes and no
+ <braunr> ipc shouldn't be that difficult/long, considering how simple i
+ want the interface to be
+ <bddebian> Cool.
+ <braunr> networking and drivers will simply be reused from another code
+ base like dde or netbsd
+ <braunr> so besides the kernel, it's a few libraries (e.g. a libports like
+ library), sysdeps parts in the c library, and a file system
+ <bddebian> For inclusion in glibc or are you not intending on using glibc?
+ <braunr> i intend to use glibc, but not for upstream integration, if that's
+ what you meant
+ <braunr> so a private, local branch i assume
+ <braunr> i expect that part to be the hardest
+
+
+## IRC, freenode, #hurd, 2013-05-02
+
+ <zacts> braunr: also, will propel/x15 use netbsd drivers or netdde linux
+ drivers?
+ <zacts> or both?
+ <braunr> probably netbsd drivers
+ <zacts> and if netbsd, will it utilize rump?
+ <braunr> i don't know yet
+ <zacts> ok
+ <braunr> device drivers and networking will arrive late
+ <braunr> the system first has to run in ram, with a truely configurable
+ boot process
+ <braunr> (i.e. a boot process that doesn't use anything static, and can
+ boot from either disk or network)
+ <braunr> rump looks good but it still requires some work since it doesn't
+ take care of messaging as well as we'd want
+ <braunr> e.g. signal relaying isn't that great
+ <zacts> I personally feel like using linux drivers would be cool, just
+ because linux supports more hardware than netbsd iirc..
+ <mcsim> zacts: But it could be problematic as you should take quite a lot
+ code from linux kernel to add support even for a single driver.
+ <braunr> zacts: netbsd drivers are far more portable
+ <zacts> oh wow, interesting. yeah I did have the idea that netbsd would be
+ more portable.
+ <braunr> mcsim: that doesn't seem to be as big a problem as you might
+ suggest
+ <braunr> the problem is providing the drivers with their requirements
+ <braunr> there are a lot of different execution contexts in linux (hardirq,
+ softirq, bh, threads to name a few)
+ <braunr> being portable (as implied in netbsd) also means being less
+ demanding on the execution context
+ <braunr> which allows reusing code in userspace more easily, as
+ demonstrated by rump
+ <braunr> i don't really care about extensive hardware support, since this
+ is required only for very popular projects such as linux
+ <braunr> and hardware support actually comes with popularity (the driver
+ code base is related with the user base)
+ <zacts> so you think that more users will contribute if the projects takes
+ off?
+ <braunr> i care about clean and maintainable code
+ <braunr> well yes
+ <zacts> I think that's a good attitude
+ <braunr> what i mean is, there is no need for extensive hardware support
+ <mcsim> braunr: TBH, I did not really got idea of rump. Do they try to run
+ the whole kernel or some chosen subsystems as user tasks?
+ <braunr> mcsim: some subsystems
+ <braunr> well
+ <braunr> all the subsystems required by the code they actually want to run
+ <braunr> (be it a file system or a network stack)
+ <mcsim> braunr: What's the difference with dde?
+ <braunr> it's not kernel oriented
+ <mcsim> what do you mean?
+ <braunr> it's not only meant to run on top of a microkernel
+ <braunr> as the author named it, it's "anykernel"
+ <braunr> if you remember at fosdem, he run code inside a browser
+ <braunr> ran*
+ <braunr> and also, netbsd drivers wouldn't restrict the license
+ <braunr> although not a priority, having a (would be) gnu system under
+ gplv3+ would be nice
+ <zacts> that would be cool
+ <zacts> x15 is already gplv3+
+ <zacts> iirc
+ <braunr> yes
+ <zacts> cool
+ <zacts> yeah, I would agree netbsd drivers do look more attractive in that
+ case
+ <braunr> again, that's clearly not the main reason for choosing them
+ <zacts> ok
+ <braunr> it could also cause other problems, such as accepting a bsd
+ license when contributing back
+ <braunr> but the main feature of the hurd isn't drivers, and what we want
+ to protect with the gpl is the main features
+ <zacts> I see
+ <braunr> drivers, as well as networking, would be third party code, the
+ same way you run e.g. firefox on linux
+ <braunr> with just a bit of glue
+ <zacts> braunr: what do you think of the idea of being able to do updates
+ for propel without rebooting the machine? would that be possible down the
+ road?
+ <braunr> simple answer: no
+ <braunr> that would probably require persistence, and i really don't want
+ that
+ <zacts> does persistence add a lot of complexity to the system?
+ <braunr> not with the code, but at execution, yes
+ <zacts> interesting
+ <braunr> we could add per-program serialization that would allow it but
+ that's clearly not a priority for me
+ <braunr> updating with a reboot is already complex enough :)
+
+
+## IRC, freenode, #hurd, 2013-05-09
+
+ <braunr> the thing is, i consider the basic building blocks of the hurd too
+ crappy to build anything really worth such effort over them
+ <braunr> mach is crappy, mig is crappy, signal handling is crappy, hurd
+ libraries are ok but incur a lot of contention, which is crappy today
+ <bddebian> Understood but it is all we have currently.
+ <braunr> i know
+ <braunr> and it's good as a prototype
+ <bddebian> We have already had L4, viengoos, etc and nothing has ever come
+ to fruition. :(
+ <braunr> my approach is compeltely different
+ <braunr> it's not a new design
+ <braunr> a few things like ipc and signals are redesigned, but that's minor
+ compared to what was intended for hurdng
+ <braunr> propel is simply meant to be a fast, scalable implementation of
+ the hurd high level architecture
+ <braunr> bddebian: imagine a mig you don't fear using
+ <braunr> imagine interfaces not constrained to 100 calls ...
+ <braunr> imagine per-thread signalling from the start
+ <bddebian> braunr: I am with you 100% but it's vaporware so far.. ;-)
+ <braunr> bddebian: i'm just explaining why i don't want to work on large
+ scale projects on the hurd
+ <braunr> fixing local bugs is fine
+ <braunr> fixing paging is mandatory
+ <braunr> usb could be implemented with dde, perhaps by sharing the pci
+ handling code
+ <braunr> (i.e. have one big dde server with drivers inside, a bit ugly but
+ straightforward compared to a full fledged pci server)
+ <bddebian> braunr: But this is the problem I see. Those of you that have
+ the skills don't have the time or energy to put into fixing that kind of
+ stuff.
+ <bddebian> braunr: That was my thought.
+ <braunr> bddebian: well i have time, and i'm currently working :p
+ <braunr> but not on that
+ <braunr> bddebian: also, it won't be vaporware for long, i may have ipc
+ working well by the end of the year, and optimized and developer-friendly
+ by next year)
+
+
+## IRC, freenode, #hurd, 2013-06-05
+
+ <braunr> i'll soon add my radix tree with support for lockless lookups :>
+ <braunr> a tree organized based on the values of the keys thmselves, and
+ not how they relatively compare to each other
+ <braunr> also, a tree of arrays, which takes advantage of cache locality
+ without the burden of expensive resizes
+ <arnuld> you seem to be applying good algorithmic teghniques
+ <arnuld> that is nice
+ <braunr> that's one goal of the project
+ <braunr> you can't achieve performance and scalability without the
+ appropriate techniques
+ <braunr> see http://git.sceen.net/rbraun/librbraun.git/blob/HEAD:/rdxtree.c
+ for the existing userspace implementation
+ <arnuld> in kern/work.c I see one TODO "allocate numeric IDs to better
+ identify worker threads"
+ <braunr> yes
+ <braunr> and i'm adding my radix tree now exactly for that
+ <braunr> (well not only, since radix tree will also back VM objects and IPC
+ spaces, two major data structures of the kernel)
+
+
+## IRC, freenode, #hurd, 2013-06-11
+
+ <braunr> and also starting paging anonymous memory in x15 :>
+ <braunr> well, i've merged my radix tree code, made it safe for lockless
+ access (or so i hope), added generic concurrent work queues
+ <braunr> and once the basic support for anonymous memory is done, x15 will
+ be able to load modules passed from grub into userspace :>
+ <braunr> but i've also been thinking about how to solve a major scalability
+ issue with capability based microkernels that noone else seem to have
+ seen or bothered thinking about
+ <braunr> for those interested, the problem is contention at the port level
+ <braunr> unlike on a monolithic kernel, or a microkernel with thread-based
+ ipc such as l4, mach and similar kernels use capabilities (port rights in
+ mach terminology) to communicate
+ <braunr> the kernel then has to "translate" that reference into a thread to
+ process the request
+ <braunr> this is done by using a port set, putting many ports inside, and
+ making worker threads receive messages on the port set
+ <braunr> and in practice, this gets very similar to a traditional thread
+ pool model
+ <braunr> one thread actually waits for a message, while others sit on a
+ list
+ <braunr> when a message arrives, the receiving thread wakes another from
+ that list so it receives the next message
+ <braunr> this is all done with a lock
+ <bddebian> Maybe they thought about it but couldn't or were to lazy to find
+ a better way? :)
+ <mcsim> braunr: what do you mean under "unlike .... a microkernel with
+ thread-based ipc such as l4, mach and similar kernels use capabilities"?
+ L4 also has capabilities.
+ <braunr> mcsim: not directly
+ <braunr> capabilities are implemented by a server on top of l4
+ <braunr> unless it's OKL4 or another variant with capabilities back in the
+ kernel
+ <braunr> i don't know how fiasco does it
+ <braunr> so the problem with this lock is potentially very heavy contention
+ <braunr> and contention in what is the equivalent of a system call ..
+ <braunr> it's also hard to make it real-time capable
+ <braunr> for example, in qnx, they temporarily apply priority inheritance
+ to *every* server thread since they don't know which one is going to be
+ receiving next
+ <mcsim> braunr: in fiasco you have capability pool for each thread and this
+ pool is stored in tread control block. When one allocates capability
+ kernel just marks slot in a pool as busy
+ <braunr> mcsim: ok but, there *is* a thread for each capability
+ <braunr> i mean, when doing ipc, there can only be one thread receiving the
+ message
+ <braunr> (iirc, this was one of the big issue for l4-hurd)
+ <mcsim> ok. i see the difference.
+ <braunr> well i'm asking
+ <braunr> i'm not so sure about fiasco
+ <braunr> but that's what i remember from the generic l4 spec
+ <mcsim> sorry, but where is the question?
+ <braunr> 16:04 < braunr> i mean, when doing ipc, there can only be one
+ thread receiving the message
+ <mcsim> yes, you specify capability to thread you want to send message to
+ <braunr> i'll rephrase:
+ <braunr> when you send a message, do you invoke a capability (as in mach),
+ or do you specify the receiving thread ?
+ <mcsim> you specify a thread
+ <braunr> that's my point
+ <mcsim> but you use local name (that is basically capability)
+ <braunr> i see
+ <braunr> from wikipedia: "Furthermore, Fiasco contains mechanisms for
+ controlling communication rights as well as kernel-level resource
+ consumption"
+ <braunr> not certain that's what it refers to, but that's what i understand
+ from it
+ <braunr> more capability features in the kernel
+ <braunr> but you still send to one thread
+ <mcsim> yes
+ <braunr> that's what makes it "easily" real time capable
+ <braunr> a microkernel that would provide mach-like semantics
+ (object-oriented messaging) but without contention at the messsage
+ passing level (and with resource preallocation for real time) would be
+ really great
+ <braunr> bddebian: i'm not sure anyone did
+ <bddebian> braunr: Well you can be the hero!! ;)
+ <braunr> the various papers i could find that were close to this subject
+ didn't take contention into account
+ <braunr> exception for network-distributed ipc on slow network links
+ <braunr> bddebian: eh
+ <braunr> well i think it's doable acctually
+ <mcsim> braunr: can you elaborate on where contention is, because I do not
+ see this clearly?
+ <braunr> mcsim: let's take a practical example
+ <braunr> a file system such as ext2fs, that you know well enough
+ <braunr> imagine a large machine with e.g. 64 processors
+ <braunr> and an ignorant developer like ourselves issuing make -j64
+ <braunr> every file access performed by the gcc tools will look up files,
+ and read/write/close them, concurrently
+ <braunr> at the server side, thread creation isn't a problem
+ <braunr> we could have as many threads as clients
+ <braunr> the problem is the port set
+ <braunr> for each port class/bucket (let's assume they map 1:1), a port set
+ is created, and all receive rights for the objects managed by the server
+ (the files) are inserted in this port set
+ <braunr> then, the server uses ports_manage_port_operations_multithread()
+ to service requests on that port set
+ <braunr> with as many threads required to process incoming messages, much
+ the same way a work queue does it
+ <braunr> but you can't have *all* threads receiving at the same time
+ <braunr> there can only be one
+ <braunr> the others are queued
+ <braunr> i did a change about the queue order a few months ago in mach btw
+ <braunr> mcsim: see ipc/ipc_thread.c in gnumach
+ <braunr> this queue is shared and must be modified, which basically means a
+ lock, and contention
+ <braunr> so the 64 concurrent gcc processes will suffer from contenion at
+ the server while they're doing something similar to a system call
+ <braunr> by that, i mean, even before the request is received
+ <braunr> mcsim: if you still don't understand, feel free to ask
+ <mcsim> braunr: I'm thinking on it :) give me some time
+ <braunr> "Fiasco.OC is a third generation microkernel, which evolved from
+ its predecessor L4/Fiasco. Fiasco.OC is capability based"
+ <braunr> ok
+ <braunr> so basically, there are no more interesting l4 variants strictly
+ following the l4v2 spec any more
+ <braunr> "The completely redesigned user-land environment running on top of
+ Fiasco.OC is called L4 Runtime Environment (L4Re). It provides the
+ framework to build multi-component systems, including a client/server
+ communication framework"
+ <braunr> so yes, client/server communication is built on top of the kernel
+ <braunr> something i really want to avoid actually
+ <mcsim> So when 1 core wants to pull something out of queue it has to lock
+ it, and the problem arrives when other 63 cpus are waiting in the same
+ lock. Right?
+ <braunr> mcsim: yes
+ <mcsim> could this be solved by implementing per cpu queues? Like in slab
+ allocator
+ <braunr> solved, no
+ <braunr> reduced, yes
+ <braunr> by using multiple port sets, each with their own thread pool
+ <braunr> but this would still leave core problems unsolved
+ <braunr> (those making real-time hard)
+ <mcsim> to make it real-time is not really essential to solve this problem
+ <braunr> that's the other way around
+ <mcsim> we just need to guarantee that locking protocol is fair
+ <braunr> solving this problem is required for quality real-time
+ <braunr> what you refer to is similar to what i described in qnx earlier
+ <braunr> it's ugly
+ <braunr> keep in mind that message passing is the equivalent of system
+ calls on monolithic kernels
+ <braunr> os ideally, we'd want something as close as possible to an
+ actually system call
+ <braunr> so*
+ <braunr> mcsim: do you see why it's ugly ?
+ <mcsim> no i meant exactly opposite, I meant to use some deterministic
+ locking protocol
+ <braunr> please elaborate
+ <braunr> because what qnx does is deterministic
+ <mcsim> We know in what sequences threads will acquire the lock, so we will
+ not have to apply inheritance to all threads
+ <braunr> hwo do you know ?
+ <mcsim> there are different approaches, like you use ticket system or MCS
+ lock (http://portal.acm.org/citation.cfm?id=103729)
+ <braunr> that's still locking
+ <braunr> a system call has 0 contention
+ <braunr> 0 potential contention
+ <mcsim> in linux?
+ <braunr> everywhere i assume
+ <mcsim> than why do they need locks?
+ <braunr> they need locks after the system call
+ <braunr> the system call itself is a stupid trap that makes the thread
+ "jump" in the kernel
+ <braunr> and the reason why it's so simple is the same as in fiasco:
+ threads (clients) communicate directly with the "server thread"
+ (themselves in kernel mode)
+ <braunr> so 1/ they don't go through a capability or any other abstraction
+ <braunr> and 2/ they're even faster than on fiasco because they don't need
+ to find the destination, it's implied by the trap mechanism)
+ <braunr> 2/ is only an optimization that we can live without
+ <braunr> but 1/ is a serious bottleneck for microkernels
+ <mcsim> Do you mean that there system call that process without locks or do
+ you mean that there are no system calls that use locks?
+ <braunr> this is what makes papers such as
+ https://www.kernel.org/doc/ols/2007/ols2007v1-pages-251-262.pdf valid
+ <braunr> i mean the system call (the mechanism used to query system
+ services) doesn't have to grab any lock
+ <braunr> the idea i have is to make the kernel transparently (well, as much
+ as it can be) associate a server thread to a client thread at the port
+ level
+ <braunr> at the server side, it would work practically the same
+ <braunr> the first time a server thread services a request, it's
+ automatically associated to a client, and subsequent request will
+ directly address this thread
+ <braunr> when the client is destroyed, the server gets notified and
+ destroys the associated server trhead
+ <braunr> for real-time tasks, i'm thinking of using a signal that gets sent
+ to all servers, notifying them of the thread creation so that they can
+ preallocate the server thread
+ <braunr> or rather, a signal to all servers wishing to be notified
+ <braunr> or perhaps the client has to reserve the resources itself
+ <braunr> i don't know, but that's the idea
+ <mcsim> and who will send this signal?
+ <braunr> the kernel
+ <braunr> x15 will provide unix like signals
+ <braunr> but i think the client doing explicit reservation is better
+ <braunr> more complicated, but better
+ <braunr> real time developers ought to know what they're doing anyway
+ <braunr> mcsim: the trick is using lockless synchronization (like rcu) at
+ the port so that looking up the matching server thread doesn't grab any
+ lock
+ <braunr> there would still be contention for the very first access, but
+ that looks much better than having it every time
+ <braunr> (potential contention)
+ <braunr> it also simplifies writing servers a lot, because it encourages
+ the use of a single port set for best performance
+ <braunr> instead of burdening the server writer with avoiding contention
+ with e.g. a hierarchical scheme
+ <mcsim> "looking up the matching server" -- looking up where?
+ <braunr> in the port
+ <mcsim> but why can't you just take first?
+ <braunr> that's what triggers contention
+ <braunr> you have to look at the first
+ <mcsim> > (16:34:13) braunr: mcsim: do you see why it's ugly ?
+ <mcsim> BTW, not really
+ <braunr> imagine serveral clients send concurrently
+ <braunr> mcsim: well, qnx doesn't do it every time
+ <braunr> qnx boosts server threads only when there are no thread currently
+ receiving, and a sender with a higher priority arrives
+ <braunr> since qnx can't know which server thread is going to be receiving
+ next, it boosts every thread
+ <braunr> boosting priority is expensive, and boosting everythread is linear
+ with the number of threads
+ <braunr> so on a big system, it would be damn slow for a system call :)
+ <mcsim> ok
+ <braunr> and grabbing "the first" can't be properly done without
+ serialization
+ <braunr> if several clients send concurrently, only one of them gets
+ serviced by the "first server thread"
+ <braunr> the second client will be serviced by the "second" (or the first
+ if it came back)
+ <braunr> making the second become the first (i call it the manager) must be
+ atomic
+ <braunr> that's the core of the problem
+ <braunr> i think it's very important because that's currently one of the
+ fundamental differences wih monolithic kernels
+ <mcsim> so looking up for server is done without contention. And just
+ assigning task to server requires lock, right?
+ <braunr> mcsim: basically yes
+ <braunr> i'm not sure it's that easy in practice but that's what i'll aim
+ at
+ <braunr> almost every argument i've read about microkernel vs monolithic is
+ full of crap
+ <mcsim> Do you mean lock on the whole queue or finer grained one?
+ <braunr> the whole port
+ <braunr> (including the queue)
+ <mcsim> why the whole port?
+ <braunr> how can you make it finer ?
+ <mcsim> is queue a linked list?
+ <braunr> yes
+ <mcsim> than can we just lock current element in the queue and elements
+ that point to current
+ <braunr> that's two lock
+ <braunr> and every sender will want "current"
+ <braunr> which then becomes coarse grained
+ <mcsim> but they want different current
+ <braunr> let's call them the manager and the spare threads
+ <braunr> yes, that's why there is a lock
+ <braunr> so they don't all get the same
+ <braunr> the manager is the one currently waiting for a message, while
+ spare threads are available but not doing anything
+ <braunr> when the manager finally receives a message, it takes the first
+ spare, which becomes the new manager
+ <braunr> exactly like in a common thread pool
+ <braunr> so what are you calling current ?
+ <mcsim> we have in a port queue of threads that wait for message: t1 -> t2
+ -> t3 -> t4; kernel decided to assign message to t3, than t3 and t2 are
+ locked.
+ <braunr> why not t1 and t2 ?
+ <mcsim> i was calling t3 in this example as current
+ <mcsim> some heuristics
+ <braunr> yeah well no
+ <braunr> it wouldn't be deterministic then
+ <mcsim> for instance client runs on core 3 and wants server that also runs
+ on core 3
+ <braunr> i really want the operation as close as a true system call as
+ possible, so O(1)
+ <braunr> what if there are none ?
+ <mcsim> it looks up forward up to the end of queue: t1->t2->t4; takes t4
+ <mcsim> than it starts from the beginning
+ <braunr> that becomes linear in the worst case
+ <mcsim> no
+ <braunr> so 4095 attempts on a 4096 cpus machine
+ <braunr> ?
+ <mcsim> you're right
+ <braunr> unfortunately :/
+ <braunr> a per-cpu scheme could be good
+ <braunr> and applicable
+ <braunr> with much more thought
+ <braunr> and the problem is that, unlike the kernel, which is naturally a
+ one thread per cpu server, userspace servers may have less or more
+ threads than cpu
+ <braunr> possibly unbalanced too
+ <braunr> so it would result in complicated code
+ <braunr> one good thing with microkernels is that they're small
+ <braunr> they don't pollute the instruction cache much
+ <braunr> keeping the code small is important for performance too
+ <braunr> so forgetting this kind of optimization makes for not too
+ complicated code, and we rely on the scheduler to properly balance
+ threads
+ <braunr> mcsim: also note that, with your idea, the worst cast is twice
+ more expensive than a single lock
+ <braunr> and on a machine with few processors, this worst case would be
+ likely
+ <mcsim> so, you propose every time try to take first server from the queue?
+ <mcsim> braunr: ^
+ <braunr> no
+ <braunr> that's what is done already
+ <braunr> i propose doing that the first time a client sends a message
+ <braunr> but then, the server thread that replied becomes strongly
+ associated to that client (it cannot service requests from other clients)
+ <braunr> and it can be recycled only when the client dies
+ <braunr> (which generates a signal indicating the server it can now recycle
+ the server thread)
+ <braunr> (a signal similar to the no-sender or dead-name notifications in
+ mach)
+ <braunr> that signal would be sent from the kernel, in the traditional unix
+ way (i.e. no dedicated signal thread since it would be another source of
+ contention)
+ <braunr> and the server thread would directly receive it, not interfering
+ with the other threads in the server in any way
+ <braunr> => contention on first message only
+ <braunr> now, for something like make -j64, which starts a different
+ process for each compilation (itself starting subprocesses for
+ preprocessing/compiling/assembling)
+ <braunr> it wouldn't be such a big win
+ <braunr> so even this first access should be optimized
+ <braunr> if you ever get an idea, feel free to share :)
+ <mcsim> May mach block thread when it performs asynchronous call?
+ <mcsim> braunr: ^
+ <braunr> sure
+ <braunr> but that's unrelated
+ <braunr> in mach, a sender is blocked only when the message queue is full
+ <mcsim> So we can introduce per cpu queues at the sender side
+ <braunr> (and mach_msg wasn't called in non blocking mode obviously)
+ <braunr> no
+ <braunr> they need to be delivered in order
+ <mcsim> In what order?
+ <braunr> messages can't be reorder once queued
+ <braunr> reordered
+ <braunr> so fifo order
+ <braunr> if you break the queue in per cpu queues, you may break that, or
+ need work to rebuild the order
+ <braunr> which negates the gain from using per cpu queues
+ <mcsim> Messages from the same thread will be kept in order
+ <braunr> are you sure ?
+ <braunr> and i'm not sure it's enough
+ <mcsim> thes cpu queues will be put to common queue once context switch
+ occurs
+ <braunr> *all* messages must be received in order
+ <mcsim> these*
+ <braunr> uh ?
+ <braunr> you want each context switch to grab a global lock ?
+ <mcsim> if you have parallel threads that send messages that do not have
+ dependencies than they are unordered
+ <mcsim> always
+ <braunr> the problem is they might
+ <braunr> consider auth for example
+ <braunr> you have one client attempting to authenticate itself to a server
+ through the auth server
+ <braunr> if message order is messed up, it just won't work
+ <braunr> but i don't have this problem in x15, since all ipc (except
+ signals) is synchronous
+ <mcsim> but it won't be messed up. You just "send" messages in O(1), but
+ than you put these messages that are not actually sent in queue all at
+ once
+ <braunr> i think i need more details please
+ <mcsim> you have lock on the port as it works now, not the kernel lock
+ <mcsim> the idea is to batch these calls
+ <braunr> i see
+ <braunr> batching can be effective, but it would really require queueing
+ <braunr> x15 only queues clients when there is no receiver
+ <braunr> i don't think batching can be applied there
+ <mcsim> you batch messages only from one client
+ <braunr> that's what i'm saying
+ <mcsim> so client can send several messages during his time slice and than
+ you put them into queue all together
+ <braunr> x15 ipc is synchronous, no more than 1 message per client at any
+ time
+ <braunr> there also are other problems with this strategy
+ <braunr> problems we have on the hurd, such as priority handling
+ <braunr> if you delay the reception of messages, you also delay priority
+ inheritance to the server thread
+ <braunr> well not the reception, the queueing actually
+ <braunr> but since batching is about delaying that, it's the same
+ <mcsim> if you use synchronous ipc than there is no sence in batching, at
+ least as I see it.
+ <braunr> yes
+ <braunr> 18:08 < braunr> i don't think batching can be applied there
+ <braunr> and i think sync ipc is the only way to go for a system intended
+ to provide messaging performance as close as possible to the system call
+ <mcsim> do you have as many server thread as many cores you have?
+ <braunr> no
+ <braunr> as many server threads as clients
+ <braunr> which matches the monolithic model
+ <mcsim> in current implementation?
+ <braunr> no
+ <braunr> currently i don't have userspace :>
+ <mcsim> and what is in hurd atm?
+ <mcsim> in gnumach
+ <braunr> asyn ipc
+ <braunr> async
+ <braunr> with message queues
+ <braunr> no priority inheritance, simple "handoff" on message delivery,
+ that's all
+ <anatoly> I managed to read the conversation :-)
+ <braunr> eh
+ <braunr> anatoly: any opinion on this ?
+ <anatoly> braunr: I have no opinion. I understand it partially :-) But
+ association of threads sounds for me as good idea
+ <anatoly> But who am I to say what is good or what is not in that area :-)
+ <braunr> there still is this "first time" issue which needs at least one
+ atomic instruction
+ <anatoly> I see. Does mach do this "first time" thing every time?
+ <braunr> yes
+ <braunr> but gnumach is uniprocessor so it doesn't matter
+ <mcsim> if we have 1:1 relation for client and server threads we need only
+ per-cpu queues
+ <braunr> mcsim: explain that please
+ <braunr> and the problem here is establishing this relation
+ <braunr> with a lockless lookup, i don't even need per cpu queues
+ <mcsim> you said: (18:11:16) braunr: as many server threads as clients
+ <mcsim> how do you create server threads?
+ <braunr> pthread_create
+ <braunr> :)
+ <mcsim> ok :)
+ <mcsim> why and when do you create a server thread?
+ <braunr> there must be at least one unbound thread waiting for a message
+ <braunr> when a message is received, that thread knows it's now bound with
+ a client, and if needed wakes up/spawns another thread to wait for
+ incoming messages
+ <braunr> when it gets a signal indicating the death of the client, it knows
+ it's now unbound, and goes back to waiting for new messages
+ <braunr> becoming either the manager or a spare thread if there already is
+ a manager
+ <braunr> a timer could be used as it's done on the hurd to make unbound
+ threads die after a timeout
+ <braunr> the distinction between the manager and spare threads would only
+ be done at the kernel level
+ <braunr> the server would simply make unbound threads wait on the port set
+ <anatoly> How client sends signal to thread about its death (as I
+ understand signal is not message) (sorry for noob question)
+ <mcsim> in what you described there are no queues at all
+ <braunr> anatoly: the kernel does it
+ <braunr> mcsim: there is, in the kernel
+ <braunr> the queue of spare threads
+ <braunr> anatoly: don't apologize for noob questions eh
+ <anatoly> braunr: is that client is a thread of some user space task?
+ <braunr> i don't think it's a newbie topic at all
+ <braunr> anatoly: a thread
+ <mcsim> make these queue per cpu
+ <braunr> why ?
+ <braunr> there can be a lot less spare threads than processors
+ <braunr> i don't think it's a good idea to spawn one thread per cpu per
+ port set
+ <braunr> on a large machine you'd have tons of useless threads
+ <mcsim> if you have many useless threads, than assign 1 thread to several
+ core, thus you will have twice less threads
+ <mcsim> i mean dynamically
+ <braunr> that becomes a hierarchical model
+ <braunr> it does reduce contention, but it's complicated, and for now i'm
+ not sure it's worth it
+ <braunr> it could be a tunable though
+ <mcsim> if you want something fast you should use something complicated.
+ <braunr> really ?
+ <braunr> a system call is very simple and very fast
+ <braunr> :p
+ <mcsim> why is it fast?
+ <mcsim> you still have a lot of threads in kernel
+ <braunr> but they don't interact during the system call
+ <braunr> the system call itself is usually a simple instruction with most
+ of it handled in hardware
+ <mcsim> if you invoke "write" system call, what do you do in kernel?
+ <braunr> you look up the function address in a table
+ <mcsim> you still have queues
+ <braunr> no
+ <braunr> sorry wait
+ <braunr> by system call, i mean "the transition from userspace to kernel
+ space"
+ <braunr> and the return
+ <braunr> not the service itself
+ <braunr> the equivalent on a microkernel system is sending a message from a
+ client, and receiving it in a server, not processing the request
+ <braunr> ideally, that's what l4 does: switching from one thread to
+ another, as simply and quickly as the hardware can
+ <braunr> so just a context and address space switch
+ <mcsim> at some point you put something in queue even in monolithic kernel
+ and make request to some other kernel thread
+ <braunr> the problem here is the indirection that is the capability
+ <braunr> yes but that's the service
+ <braunr> i don't care about the service here
+ <braunr> i care about how the request reaches the server
+ <mcsim> this division exist for microkernels
+ <mcsim> for monolithic it's all mixed
+ <anatoly> What does thread do when it receive a message?
+ <braunr> anatoly: what it wants :p
+ <braunr> the service
+ <braunr> mcsim: ?
+ <braunr> mixed ?
+ <anatoly> braunr: hm, is it a thread of some server?
+ <mcsim> if you have several working threads in monolithic kernel you have
+ to put request in queue
+ <braunr> anatoly: yes
+ <braunr> mcsim: why would you have working threads ?
+ <mcsim> and there is no difference either you consider it as service or
+ just "transition from userspace to kernel space"
+ <braunr> i mean, it's a good thing to have, they usually do, but they're
+ not implied
+ <braunr> they're completely irrelevant to the discussion here
+ <braunr> of course there is
+ <braunr> you might very well perform system calls that don't involve
+ anything shared
+ <mcsim> you can also have only one working thread in microkernel
+ <braunr> yes
+ <mcsim> and all clients will wait for it
+ <braunr> you're mixing up work queues in the discussion here
+ <braunr> server threads are very similar to a work queue, yes
+ <mcsim> but you gave me an example with 64 cores and each core runs some
+ server thread
+ <braunr> they're a thread pool handling requests
+ <mcsim> you can have only one thread in a pool
+ <braunr> they have to exist in a microkernel system to provide concurrency
+ <braunr> monolithic kernels can process concurrently without them though
+ <mcsim> why?
+ <braunr> because on a monolithic system, _every client thread is its own
+ server_
+ <braunr> a thread making a system call is exactly like a client requesting
+ a service
+ <braunr> on a monolithic kernel, the server is the kernel
+ <braunr> and it *already* has as many threads as clients
+ <braunr> and that's pretty much the only thing beautiful about monolithic
+ kernels
+ <mcsim> right
+ <mcsim> have to think about it :)
+ <braunr> that's why they scale so easily compared to microkernel based
+ systems
+ <braunr> and why l4 people chose to have thread-based ipc
+ <braunr> but this just moves the problems to an upper level
+ <braunr> and is probably why they've realized one of the real values of
+ microkernel systems is capabilities
+ <braunr> and if you want to make them fast enough, they should be handled
+ directly by the kernel
+
+
+## IRC, freenode, #hurd, 2013-06-13
+
+ <bddebian> Heya Richard. Solve the worlds problems yet? :)
+ <kilobug> bddebian: I fear the worlds problems are NP-complete ;)
+ <bddebian> heh
+ <braunr> bddebian: i wish i could solve mine at least :p
+ <bddebian> braunr: I meant the contention thing you were discussing the
+ other day :)
+ <braunr> bddebian: oh
+ <braunr> i have a solution that improves the behaviour yes, but there is
+ still contention the first time a thread performs an ipc
+ <bddebian> Any thread or the first time there is contention?
+ <braunr> there may be contention the first time a thread sends a message to
+ a server
+ <braunr> (assuming a server uses a single port set to receive requests)
+ <bddebian> Oh aye
+ <braunr> i think it's as much as can be done considering there is a
+ translation from capability to thread
+ <braunr> other schemes are just too heavy, and thus don't scale well
+ <braunr> this translation is one of the two important nice properties of
+ microkernel based systems, and translations (or indrections) usually have
+ a cost
+ <braunr> so we want to keep them
+ <braunr> and we have to accept that cost
+ <braunr> the amount of code in the critical section should be so small it
+ should only matter for machines with several hundreds or thousands
+ processors
+ <braunr> so it's not such a bit problem
+ <bddebian> OK
+ <braunr> but it would have been nice to have an additional valid
+ theoretical argument to explain how ipc isn't that slow compared to
+ system calls
+ <braunr> s/bit/big/
+ <braunr> people keep saying l4 made ipc as fast as system calls without
+ taking that stuff into account
+ <braunr> which makes the community look lame in the eyes of those familiar
+ with it
+ <bddebian> heh
+ <braunr> with my solution, persistent applications like databases should
+ perform as fast as on an l4 like kernel
+ <braunr> but things like parallel builds, which start many different
+ processes for each file, will suffer a bit more from contention
+ <braunr> seems like a fair compromise to me
+ <bddebian> Aye
+ <braunr> as mcsim said, there is a lot of contention about everywhere in
+ almost every application
+ <braunr> and lockless stuff is hard to correctly implement
+ <braunr> os it should be all right :)
+ <braunr> ... :)
+ <mcsim> braunr: What if we have at least 1 thread for each core that stay
+ in per-core queue. When we decide to kill a thread and this thread is
+ last in a queue we replace it with load balancer. This is still worse
+ than with monolithic kernel, but it is simplier to implement from kernel
+ perspective.
+ <braunr> mcsim: it doesn't scale well
+ <braunr> you end up with one thread per cpu per port set
+ <mcsim> load balancer is only one thread
+ <mcsim> why would it end up like you said?
+ <braunr> remember the goal is to avoid contention
+ <braunr> your proposition is to set per cpu queues
+ <braunr> the way i understand what you said, it means clients will look up
+ a server thread in these queues
+ <braunr> one of them actually, the one for the cpu they're currently
+ running one
+ <braunr> so 1/ it disables migration
+ <braunr> or 2/ you have one server thread per client per cpu
+ <braunr> i don't see what a "load balancer" would do here
+ <mcsim> client either finds server thread without contention or it sends
+ message to load balancer, that redirects message to thread from global
+ queue. Where global queue is concatenation of local ones.
+ <braunr> you can't concatenate local queues in a global one
+ <braunr> if you do that, you end up with a global queue, and a global lock
+ again
+ <mcsim> not global
+ <mcsim> load balancer is just one
+ <braunr> then you serialize all remote messaging through a single thread
+ <mcsim> so contention will be only among local thread and load balancer
+ <braunr> i don't see how it doesn't make the load balancer global
+ <mcsim> it makes
+ <mcsim> but it just makes bootstraping harder
+ <braunr> i'm not following
+ <braunr> and i don't see how it improves on my solution
+ <mcsim> in your example with make -j64 very soon there will be local
+ threads at any core
+ <braunr> yes, hence the lack of scalability
+ <mcsim> but that's your goal: create as many server thread as many clients
+ you have, isn't it?
+ <braunr> your solution may create a lot more
+ <braunr> again, one per port set (or server) per cpu
+ <braunr> imagine this worst case: you have a single client with one thread
+ <braunr> which gets migrated to every cpu on the machine
+ <braunr> it will spawn one thread per cpu at the server side
+ <mcsim> why would it migrate all the time?
+ <braunr> it's a worst case
+ <braunr> if it can migrate, consider it will
+ <braunr> murphy's law, you know
+ <braunr> also keep in mind contention doesn't always occur with a global
+ lock
+ <braunr> i'm talking about potential contention
+ <braunr> and same things apply: if it can happen, consider it will
+ <mcsim> than we can make load balancer that also migrates server threads
+ <braunr> ok so in addition to worker threads, we'll add an additional per
+ server load balancer which may have to lock several queues at once
+ <braunr> doesn't it feel completely overkill to you ?
+ <mcsim> load balancer is global, not per-cpu
+ <mcsim> there could be contention for it
+ <braunr> again, keep in mind this problem becomes important for several
+ hundreds processors, not below
+ <braunr> yes but it has to balance
+ <braunr> which means it has to lock cpu queues
+ <braunr> and at least two of them to "migrate" server threads
+ <braunr> and i don't know why it would do that
+ <braunr> i don't see the point of the load balancer
+ <mcsim> so, you start make -j64. First 64 invocations of gcc will suffer
+ from contention for load balancer, but later on it will create enough
+ server threads and contention will disappear
+ <braunr> no
+ <braunr> that's the best case : there is always one server thread per cpu
+ queue
+ <braunr> how do you guarantee your 64 server threads don't end up in the
+ same cpu queue ?
+ <braunr> (without disabling migration)
+ <mcsim> load balancer will try to put some server thread to the core where
+ load balancer was invoked
+ <braunr> so there is no guarantee
+ <mcsim> LB can pin server thread
+ <braunr> unless we invoke it regularly, in a way similar to what is already
+ done in the SMP scheduler :/
+ <braunr> and this also means one balancer per cpu then
+ <mcsim> why one balance per cpu?
+ <braunr> 15:56 < mcsim> load balancer will try to put some server thread to
+ the core where load balancer was invoked
+ <braunr> why only where it was invoked ?
+ <mcsim> because it assumes that if some one asked for server at core x, it
+ most likely will ask for the same service from the same core
+ <braunr> i'm not following
+ <mcsim> LB just tries to prefetch were next call will be
+ <braunr> what you're describing really looks like per-cpu work queues ...
+ <braunr> i don't see how you make sure there aren't too many threads
+ <braunr> i don't see how a load balancer helps
+ <braunr> this is just an heuristic
+ <mcsim> when server thread is created?
+ <mcsim> who creates it?
+ <braunr> and it may be useless, depending on how threads are migrated and
+ when they call the server
+ <braunr> same answer as yesterday
+ <braunr> there must be at least one thread receiving messages on a port set
+ <braunr> when a message arrives, if there aren't any spare threads, it
+ spawns one to receive messages while it processes the request
+ <mcsim> at the moment server threads are killed by timeout, right?
+ <braunr> yes
+ <braunr> well no
+ <braunr> there is a debian patch that disables that
+ <braunr> because there is something wrong with thread destruction
+ <braunr> but that's an implementation bug, not a design issue
+ <mcsim> so it is the mechanism how we insure that there aren't too many
+ threads
+ <mcsim> it helps because yesterday I proposed to hierarchical scheme, were
+ one server thread could wait in cpu queues of several cores
+ <mcsim> but this has to be implemented in kernel
+ <braunr> a hierarchical scheme would help yes
+ <braunr> a bit
+ <mcsim> i propose scheme that could be implemented in userspace
+ <braunr> ?
+ <mcsim> kernel should not distinguish among load balancer and server thread
+ <braunr> sorry this is too confusing
+ <braunr> please start describing what you have in mind from the start
+ <mcsim> ok
+ <mcsim> so my starting point was to use hierarchical management
+ <mcsim> but the drawback was that to implement it you have to do this in
+ kernel
+ <mcsim> right?
+ <braunr> no
+ <mcsim> so I thought how can this be implemented in user space
+ <braunr> being in kernel isn't the problem
+ <braunr> contention is
+ <braunr> on the contrary, i want ipc in kernel exactly because that's where
+ you have the most control over how it happens
+ <braunr> and can provide the best performance
+ <braunr> ipc is the main kernel responsibility
+ <mcsim> but if you have few clients you have low contention
+ <braunr> the goal was "0 potential contention"
+ <mcsim> and if you have many clients, you have many servers
+ <braunr> let's say server threads
+ <braunr> for me, a server is a server task or process
+ <mcsim> right
+ <braunr> so i think 0 potential contention is just impossible
+ <braunr> or it requires too many resources that make the solution not
+ scalable
+ <mcsim> 0 contention is impossible, since you have disbalance in numbers of
+ client threads and server threads
+ <braunr> well no
+ <braunr> it *canù be achieved
+ <braunr> imagine servers register themselves to the kernel
+ <braunr> and the kernel signals them when a client thread is spawned
+ <braunr> you'd effectively have one server thread per client
+ <braunr> (there would be other problems like e.g. when a server thread
+ becomes the client of another, etc..)
+ <braunr> so it's actually possible
+ <braunr> but we clearly don't want that, unless perhaps for real time
+ threads
+ <braunr> but please continue
+ <mcsim> what does "and the kernel signals them when a client thread is
+ spawned" mean?
+ <braunr> it means each time a thread not part of a server thread is
+ created, servers receive a signal meaning "hey, there's a new thread out
+ there, you might want to preallocate a server thread for it"
+ <mcsim> and what is the difference with creating thread on demand?
+ <braunr> on demand can occur when receiving a message
+ <braunr> i.e. during syscall
+ <mcsim> I will continue, I just want to be sure that I'm not basing on
+ wrong assumtions.
+ <mcsim> and what is bad in that?
+ <braunr> (just to clarify, i use the word "syscall" with the same meaning
+ as "RPC" on a microkernel system, whereas it's a true syscall on a
+ monolithic one)
+ <braunr> contention
+ <braunr> whether you have contention on a list of threads or on map entries
+ when allocating a stack doesn't matter
+ <braunr> the problem is contention
+ <mcsim> and if we create server thread always?
+ <mcsim> and do not keep them in queue?
+ <braunr> always ?
+ <mcsim> yes
+ <braunr> again
+ <braunr> you'd have to allocate a stack for it
+ <braunr> every time
+ <braunr> so two potentially heavy syscalls to allocate/free the stac
+ <braunr> k
+ <braunr> not to mention the thread itself, its associations with its task,
+ ipc space, maintaining reference counts
+ <braunr> (moar contention)
+ <braunr> creating threads was considered cheap at the time the process was
+ the main unit of concurrency
+ <mcsim> ok, than we will have the same contention if we will create a
+ thread when "the kernel signals them when a client thread is spawned"
+ <braunr> now we have work queues / thread pools just to avoid that
+ <braunr> no
+ <braunr> because that contention happens at thread creation
+ <braunr> not during a syscall
+ <braunr> i'll redefine the problem: the problem is contention during a
+ system call / IPC
+ <mcsim> ok
+ <braunr> note that my current solution is very close to signalling every
+ server
+ <braunr> it's the lazy version
+ <braunr> match at first IPC time
+ <mcsim> so I was basing my plan on the case when we create new thread when
+ client makes syscall and there is not enough server threads
+ <braunr> the problem exists even when there is enough server threads
+ <braunr> we shouldn't consider the case where there aren't enough server
+ threads
+ <braunr> real time tasks are the only ones which want that, and can
+ preallocate resources explicitely
+ <mcsim> I think that real time tasks should be really separated
+ <mcsim> For them resource availability as much more important that good
+ resource utilisation.
+ <mcsim> So if we talk about real time tasks we should apply one police and
+ for non-real time another
+ <mcsim> So it shouldn't be critical if thread is created during syscall
+ <braunr> agreed
+ <braunr> that's what i was saying :
+ <braunr> :)
+ <braunr> 16:23 < braunr> we shouldn't consider the case where there aren't
+ enough server threads
+ <braunr> in this case, we spawn a thread, and that's ok
+ <braunr> it will live on long enough that we really don't care about the
+ cost of lazily creating it
+ <braunr> so let's concentrate only on the case where there already are
+ enough server threads
+ <mcsim> So if client makes a request to ST (is it ok to use abbreviations?)
+ there are several cases:
+ <mcsim> 1/ There is ST waiting on local queue (trivial case)
+ <mcsim> 2/ There is no ST, only load balancer (LB). LB decides to create a
+ new thread
+ <mcsim> 3/ Like in previous case, but LB decides to perform migration
+ <braunr> migration of what ?
+ <mcsim> migration of ST from other core
+ <braunr> the only case effectively solving the problem is 1
+ <braunr> others introduce contention, and worse, complex code
+ <braunr> i mean a complex solution
+ <braunr> not only code
+ <braunr> even the addition of a load balancer per port set
+ <braunr> thr data structures involved for proper migration
+ <mcsim> But 2 and 3 in long run will lead to having enough threads on all
+ cores
+ <braunr> then you end up having 1 per client per cpu
+ <mcsim> migration is needed in any case
+ <braunr> no
+ <braunr> why would it be ?
+ <mcsim> to balance load
+ <mcsim> not only for this case
+ <braunr> there already is load balancing in the scheduler
+ <braunr> we don't want to duplicate its function
+ <mcsim> what kind of load balancing?
+ <mcsim> *has scheduler
+ <braunr> thread weight / cpu
+ <mcsim> and does it perform migration?
+ <braunr> sure
+ <mcsim> so scheduler can be simplified if policy "when to migrate" will be
+ moved to user space
+ <braunr> this is becoming a completely different problem
+ <braunr> and i don't want to do that
+ <braunr> it's very complicated for no real world benefit
+ <mcsim> but all this will be done in userspace
+ <braunr> ?
+ <braunr> all what ?
+ <mcsim> migration decisions
+ <braunr> in your scheme you mean ?
+ <mcsim> yes
+ <braunr> explain how
+ <mcsim> LB will decide when thread will migrate
+ <mcsim> and LB is user space task
+ <braunr> what does it bring ?
+ <braunr> imagine that, in the mean time, the scheduler then decides the
+ client should migrate to another processor for fairness
+ <braunr> you'd have migrated a server thread once for no actual benefit
+ <braunr> or again, you need to disable migration for long durations, which
+ sucks
+ <braunr> also
+ <braunr> 17:06 < mcsim> But 2 and 3 in long run will lead to having enough
+ threads on all cores
+ <braunr> contradicts the need for a load balancer
+ <braunr> if you have enough threads every where, why do you need to balance
+ ?
+ <mcsim> and how are you going to deal with the case when client will
+ migrate all the time?
+ <braunr> i intend to implement something close to thread migration
+ <mcsim> because some of them can die because of timeout
+ <braunr> something l4 already does iirc
+ <braunr> the thread scheduler manages scheduling contexts
+ <braunr> which can be shared by different threads
+ <braunr> which means the server thread bound to its client will share the
+ scheduling context
+ <braunr> the only thing that gets migrated is the scheduling context
+ <braunr> the same way a thread can be migrated indifferently on a
+ monolithic system, whether it's in user of kernel space (with kernel
+ preemption enabled ofc)
+ <braunr> or*
+ <mcsim> but how server thread can process requests from different clients?
+ <braunr> mcsim: load becomes a problem when there are too many threads, not
+ when they're dying
+ <braunr> they can't
+ <braunr> at first message, they're *bound*
+ <braunr> => one server thread per client
+ <braunr> when the client dies, the server thread is ubound and can be
+ recycled
+ <braunr> unbound*
+ <mcsim> and you intend to put recycled threads to global queue, right?
+ <braunr> yes
+ <mcsim> and I propose to put them in local queues in hope that next client
+ will be on the same core
+ <braunr> the thing is, i don't see the benefit
+ <braunr> next client could be on another
+ <braunr> in which case it gets a lot heavier than the extremely small
+ critical section i have in mind
+ <mcsim> but most likely it could be on the same
+ <braunr> uh, no
+ <mcsim> becouse on this load on this core is decreased
+ <mcsim> *because
+ <braunr> well, ok, it would likely remain on the same cpu
+ <braunr> but what happens when it migrates ?
+ <braunr> and what about memory usage ?
+ <braunr> one queue per cpu per port set can get very large
+ <braunr> (i understand the proposition better though, i think)
+ <mcsim> we can ask also "What if random access in memory will be more usual
+ than sequential?", but we still optimise sequential one, making random
+ sometimes even worse. The real question is "How can we maximise benefit
+ of knowledge where free server thread resides?"
+ <mcsim> previous was reply to: "(17:17:08) braunr: but what happens when it
+ migrates ?"
+ <braunr> i understand
+ <braunr> you optimize for the common case
+ <braunr> where a lot more ipc occurs than migrations
+ <braunr> agreed
+ <braunr> now, what happens when the server thread isn't in the local queue
+ ?
+ <mcsim> than client request will be handled to LB
+ <braunr> why not search directly itself ?
+ <braunr> (and btw, the right word is "then")
+ <mcsim> LB can decide whom to migrate
+ <mcsim> right, sorry
+ <braunr> i thought you were improving on my scheme
+ <braunr> which implies there is a 1:1 mapping for client and server threads
+ <mcsim> If job of LB is too small than it can be removed and everything
+ will be done in kernel
+ <braunr> it can't be done in userspace anyway
+ <braunr> these queues are in the port / port set structures
+ <braunr> it could be done though
+ <braunr> i mean
+ <braunr> using per cpu queues
+ <braunr> server threads could be both in per cpu queues and in a global
+ queue as long as they exist
+ <mcsim> there should be no global queue, because there again will be
+ contention for it
+ <braunr> mcsim: accessing a load balancer implies contention
+ <braunr> there is contention anyway
+ <braunr> what you're trying to do is reduce it in the first message case if
+ i'm right
+ <mcsim> braunr: yes
+ <braunr> well then we have to revise a few assumptions
+ <braunr> 17:26 < braunr> you optimize for the common case
+ <braunr> 17:26 < braunr> where a lot more ipc occurs than migrations
+ <braunr> that actually becomes wrong
+ <braunr> the first message case occurs for newly created threads
+ <mcsim> for make -j64 this is actually common case
+ <braunr> and those are usually not spawn on the processor their parent runs
+ on
+ <braunr> yes
+ <braunr> if you need all processors, yes
+ <braunr> i don't think taking into account this property changes many
+ things
+ <braunr> per cpu queues still remain the best way to avoid contention
+ <braunr> my problem with this solution is that you may end up with one
+ unbound thread per processor per server
+ <braunr> also, i say "per server", but it's actually per port set
+ <braunr> and even per port depending on how a server is written
+ <braunr> (the system will use one port set for one server in the common
+ case but still)
+ <braunr> so i'll start with a global queue for unbound threads
+ <braunr> and the day we decide it should be optimized with local (or
+ hierarchical) queues, we can still do it without changing the interface
+ <braunr> or by simply adding an option at port / port set creation
+ <braunr> whicih is a non intrusive change
+ <mcsim> ok. your solution should be simplier. And TBH, what I propose is
+ not clearly much mory gainful.
+ <braunr> well it is actually for big systems
+ <braunr> it is because instead of grabbing a lock, you disable preemption
+ <braunr> which means writing to a local, uncontended variable
+ <braunr> with 0 risk of cache line bouncing
+ <braunr> this actually looks very good to me now
+ <braunr> using an option to control this behaviour
+ <braunr> and yes, in the end, it gets very similar to the slab allocator,
+ where you can disable the cpu pool layer with a flag :)
+ <braunr> (except the serialized case would be the default one here)
+ <braunr> mcsim: thanks for insisting
+ <braunr> or being persistent
+ <mcsim> braunr: thanks for conversation :)
+ <mcsim> and probably I had to start from statement that I wanted to improve
+ common case
+
+
+## IRC, freenode, #hurd, 2013-06-20
+
+ <congzhang> braunr: how about your x15, it is impovement for mach or
+ redesign? I really want to know that:)
+ <braunr> it's both largely based on mach and now quite far from it
+ <braunr> based on mach from a functional point of view
+ <braunr> i.e. the kernel assumes practically the same functions, with a
+ close interface
+ <congzhang> Good point:)
+ <braunr> except for ipc which is entirely rewritten
+ <braunr> why ? :)
+ <congzhang> for from a functional point of view:) I think each design has
+ it intrinsic advantage and disadvantage
+ <braunr> but why is it good ?
+ <congzhang> if redesign , I may need wait more time to a new function hurd
+ <braunr> you'll have to wait a long time anyway :p
+ <congzhang> Improvement was better sometimes, although redesign was more
+ attraction sometimes :)
+ <congzhang> I will wait :)
+ <braunr> i wouldn't put that as a reason for it being good
+ <braunr> this is a departure from what current microkernel projects are
+ doing
+ <braunr> i.e. x15 is a hybrid
+ <congzhang> Sure, it is good from design too:)
+ <braunr> yes but i don't see why you say that
+ <congzhang> Sorry, i did not show my view clear, it is good from design
+ too:)
+ <braunr> you're just saying it's good, you're not saying why you think it's
+ good
+ <congzhang> I would like to talk hybrid, I want to talk that, but I am a
+ litter afraid that you are all enthusiasm microkernel fans
+ <braunr> well no i'm not
+ <braunr> on the contrary, i'm personally opposed to the so called
+ "microkernel dogma"
+ <braunr> but i can give you reasons why, i'd like you to explain why *you*
+ think a hybrid design is better
+ <congzhang> so, when I talk apple or nextstep, I got one soap :)
+ <braunr> that's different
+ <braunr> these are still monolithic kernels
+ <braunr> well, monolithic systems running on a microkernel
+ <congzhang> yes, I view this as one type of hybrid
+ <braunr> no it's not
+ <congzhang> microkernel wan't to divide process ( task ) from design view,
+ It is great
+ <congzhang> as implement view or execute view, we have one cpu and some
+ physic memory, as the simplest condition, we can't change that
+ <congzhang> that what resource the system has
+ <braunr> what's your point ?
+ <congzhang> I view this as follow
+ <congzhang> I am cpu and computer
+ <congzhang> application are the things I need to do
+ <congzhang> for running the program and finish the job, which way is the
+ best way for me
+ <congzhang> I need keep all the thing as simple as possible, divide just
+ from application design view, for me no different
+ <congzhang> desgin was microkernel , run just for one cpu and these
+ resource.
+ <braunr> (well there can be many processors actually)
+ <congzhang> I know, I mean hybrid at some level, we can't escape that
+ <congzhang> braunr: I show my point?
+ <braunr> well l4 systems showed we somehow can
+ <braunr> no you didn't
+ <congzhang> x15's api was rpc, right?
+ <braunr> yes
+ <braunr> well a few system calls, and mostly rpcs on top of the ipc one
+ <braunr> jsu tas with mach
+ <congzhang> and you hope the target logic run locally just like in process
+ function call, right?
+ <braunr> no
+ <braunr> it can't run locally
+ <congzhang> you need thread context switch
+ <braunr> and address space context switch
+ <congzhang> but you cut down the cost
+ <braunr> how so ?
+ <congzhang> I mean you do it, right?
+ <congzhang> x15
+ <braunr> yes but no in this way
+ <braunr> in every other way :p
+ <congzhang> I know, you remeber performance anywhere :p
+ <braunr> i still don't see your point
+ <braunr> i'd like you to tell, in one sentence, why you think hybrids are
+ better
+ <congzhang> balance the design and implement problem :p
+ <braunr> which is ?
+ <congzhang> hybird for kernel arc
+ <braunr> you're stating the solution inside the problem
+ <congzhang> you are good at mathmatics
+ <congzhang> sorry, I am not native english speaker
+ <congzhang> braunr: I will find some more suitable sentence to show my
+ point some day, but I can't find one if you think I did not show my
+ point:)
+ <congzhang> for today
+ <braunr> too bad
+ <congzhang> If i am computer I hope the arch was monolithic, If i am
+ programer I hope the arch was microkernel, that's my idea
+ <braunr> ok let's get a bit faster
+ <braunr> monolithic for performance ?
+ <congzhang> braunr: sorry for that, and thank you for the talk:)
+ <braunr> (a computer doesn't "hope")
+ <congzhang> braunr: you need very clear answer, I can't give you that,
+ sorry again
+ <braunr> why do you say "If i am computer I hope the arch was monolithic" ?
+ <congzhang> I know you can slove any single problem
+ <braunr> no i don't, and it's not about me
+ <braunr> i'm just curious
+ <congzhang> I do the work for myself, as my own view, all the resource
+ belong to me, I does not think too much arch related divide was need, if
+ I am the computer :P
+ <braunr> separating address spaces helps avoiding serious errors like
+ corrupting memory of unrelated subsystems
+ <braunr> how does one not want that ?
+ <braunr> (except for performance)
+ <congzhang> braunr: I am computer when I say that words!
+ <braunr> a computer doesn't want anything
+ <braunr> users (including developers) on the other way are the point of
+ view you should have
+ <congzhang> I am engineer other time
+ <congzhang> we create computer, but they are lifeable just my feeling, hope
+ not talk this topic
+ <braunr> what ?
+ <congzhang> I mark computer as life things
+ <braunr> please don't
+ <braunr> and even, i'll make a simple example in favor of isolating
+ resources
+ <braunr> if we, humans, were able to control all of our "resources", we
+ could for example shut down our heart by mistake
+ <congzhang> back to the topic, I think monolithic was easy to understand,
+ and cut the combinatorial problem count for the perfect software
+ <braunr> the reason the body have so many involuntary functions is probably
+ because those who survived did so because these functions were
+ involuntary and controlled by separated physiological functions
+ <braunr> now that i've made this absurd point, let's just not consider
+ computers as life forms
+ <braunr> microkernels don't make a system that more complicated
+ <congzhang> they does
+ <braunr> no
+ <congzhang> do
+ <braunr> they create isolation
+ <braunr> and another layer of indirection with capabilities
+ <braunr> that's it
+ <braunr> it's not that more complicated
+ <congzhang> view the kernel function from more nature view, execute some
+ code
+ <braunr> what ?
+ <congzhang> I know the benefit of the microkernel and the os
+ <congzhang> it's complicated
+ <braunr> not that much
+ <congzhang> I agree with you
+ <congzhang> microkernel was the idea of organization
+ <braunr> yes
+ <braunr> but always keep in mind your goal when thinking about means to
+ achieve them
+ <congzhang> we do the work at diferent view
+ <kilobug> what's quite complicated is making a microkernel design without
+ too much performances loss, but aside from that performances issue, it's
+ not really much more complicated
+ <congzhang> hurd do the work at os level
+ <kilobug> even a monolithic kernel is made of several subsystems that
+ communicated with each others using an API
+ <core-ix> i'm reading this conversation for some time now
+ <core-ix> and I have to agree with braunr
+ <core-ix> microkernels simplify the design
+ <braunr> yes and no
+ <braunr> i think it depends a lot on the availability of capabilities
+ <core-ix> i have experience mostly with QNX and i can say it is far more
+ easier to write a driver for QNX, compared to Linux/BSD for example ...
+ <braunr> which are the major feature microkernels usually add
+ <braunr> qnx >= 5 do provide capabilities
+ <braunr> (in the form of channels)
+ <core-ix> yeah ... it's the basic communication mechanism
+ <braunr> but my initial and still unanswered question was: why do people
+ think a hybrid kernel is batter than a true microkernel, or not
+ <braunr> better*
+ <congzhang> I does not say what is good or not, I just say hybird was
+ accept
+ <braunr> core-ix: and if i'm right, they're directly implemented by the
+ kernel, and not a userspace system server
+ <core-ix> braunr: evolution is more easily accepted than revolution :)
+ <core-ix> braunr: yes, message passing is in the QNX kernel
+ <braunr> not message passing, capabilities
+ <braunr> l4 does message passing in kernel too, but you need to go through
+ a capability server
+ <braunr> (for the l4 variants i have in mind at least)
+ <congzhang> the operating system evolve for it's application.
+ <braunr> congzhang: about evolution, that's one explanation, but other than
+ that ?
+ <braunr> core-ix: ^
+ <core-ix> braunr: by capability you mean (for the lack of a better word
+ i'll use) access control mechanisms?
+ <braunr> i mean reference-rights
+ <core-ix> the "trusted" functionality available in other OS?
+ <braunr> http://en.wikipedia.org/wiki/Capability-based_security
+ <braunr> i don't know what other systems refer to with "trusted"
+ functionnality
+ <core-ix> yeah, the same thing
+ <congzhang> for now, I am searching one way to make hurd arm edition
+ suitable for Raspberry Pi
+ <congzhang> I hope design or the arch itself cant scale
+ <congzhang> can be scale
+ <core-ix> braunr: i think (!!!) that those are implemented in the Secure
+ Kernel (http://www.qnx.com/products/neutrino-rtos/secure-kernel.html)
+ <core-ix> never used it though ...
+ <congzhang> rpc make intercept easy :)
+ <braunr> core-ix: regular channels are capabilities
+ <core-ix> yes, and by extensions - they are in the kenrel
+ <braunr> that's my understanding too
+ <braunr> and that one thing that, for me, makes qnx an hybrid as well
+ <congzhang> just need intercept in kernel,
+ <core-ix> braunr: i would dive the academic aspects of this ... in my mind
+ a microkernel is system that provides minimal hardware abstraction,
+ communication primitives (usually message passing), virtual memory
+ protection
+ <core-ix> *wouldn't ...
+ <braunr> i think it's very important on the contrary
+ <braunr> what you describe is the "microkernel dogma"
+ <braunr> precisely
+ <braunr> that doesn't include capabilities
+ <braunr> that's why l4 messaging is thread-based
+ <braunr> and that's why l4 based systems are so slow
+ <braunr> (except okl4 which put back capabilities in the kernel)
+ <core-ix> so the compromise here is to include capabilities implementation
+ in the kernel, thus making the final product hybrid?
+ <braunr> not only
+ <braunr> because now that you have them in kernel
+ <braunr> the kernel probably has to manage memory for itself
+ <braunr> so you need more features in the virtual memory system
+ <core-ix> true ...
+ <braunr> that's what makes it a hybrid
+ <braunr> other ways being making each client provide memory, but that's
+ when your system becomes very complicated
+ <core-ix> but I believe this is true for pretty much any "general OS" case
+ <braunr> and some resources just can't be provided by a client
+ <braunr> e.g. a client can't provide virtual memory to another process
+ <braunr> okl4 is actually the only pragmatic real-world implementation of
+ l4
+ <braunr> and they also added unix-like signals
+ <braunr> so that's an interesting model
+ <braunr> as well as qnx
+ <braunr> the good thing about the hurd is that, although it's not kernel
+ agnostic, it doesn't require a lot from the underlying kernel
+ <core-ix> about hurd?
+ <braunr> yes
+ <core-ix> i really need to dig into this code at some point :)
+ <braunr> well you may but you may not see that property from the code
+ itself
+
+
+## IRC, freenode, #hurd, 2013-06-28
+
+ <teythoon> so tell me about x15 if you are in the mood to talk about that
+ <braunr> what do you want to know ?
+ <teythoon> well, the high level stuff first
+ <teythoon> like what's the big picture
+ <braunr> the big picture is that x15 is intended to be a "better mach for
+ the hurd
+ <braunr> "
+ <braunr> mach is too general purpose
+ <braunr> its ipc mechanism too powerful
+ <braunr> too complicated, error prone, and slow
+ <braunr> so i intend to build something a lot simpler and faster :p
+ <teythoon> so your big picture includes actually porting hurd? i thought i
+ read somewhere that you have a rewrite in mind
+ <braunr> it's a clone, yes
+ <braunr> x15 will feature mostly sync ipc, and no high level types inside
+ messages
+ <braunr> the ipc system call will look like what qnx does
+ <braunr> send-recv from the client, recv/reply/reply-recv from the server
+ <teythoon> but doesn't sync mean that your context switch will have to be
+ quite fast?
+ <braunr> how does that differ from the async approach ?
+ <braunr> (keep in mind that almost all hurd RPCs are synchronous)
+ <teythoon> yes, I know, and it also affects async mode, but a slow switch
+ is worse for the sync case, isn't it?
+ <teythoon> ok so your ipc will be more agnostic wrt to what it transports?
+ unlike mig I presume?
+ <braunr> no it's the same
+ <braunr> yes
+ <braunr> input will be an array, each entry denoting either memory or port
+ rights
+ <braunr> (or directly one entry for fast ipcs)
+ <teythoon> memory as in pointers?
+ <braunr> (well fast ipc when there is only one entry to avoid hitting a
+ table)
+ <braunr> pointer/size yes
+ <teythoon> hm, surely you want a way to avoid copying that, right?
+ <braunr> the only operation will be copy (i.e. unlike mach which allows
+ sharing)
+ <braunr> why ?
+ <braunr> copy doesn't exclude zero copy
+ <braunr> (zero copy being adjusting page tables with copy on write
+ techniques)
+ <teythoon> right
+ <teythoon> but isn't that too coarse, like in cow a whole page?
+ <braunr> depends on the message size
+ <braunr> or options provided by the caller, i don't know yet
+ <teythoon> oh, you are going to pack the memory anyway?
+ <braunr> depends on the caller
+ <braunr> i'm not yet sure about these details
+ <braunr> ideally, i'd like to avoid serialization altogether
+ <teythoon> wouldn't that be like cheating b/c it's the first copy?
+ <braunr> directly pass pointers/sizes from the sender address space, and
+ either really copy or use zero copy
+ <teythoon> right, but then you're back at the page size issue
+ <braunr> yes
+ <braunr> it's not a real issue
+ <braunr> the kernel must support both ways
+ <braunr> the minor issue is determining which way to choose
+ <braunr> it's not a critical issue
+ <braunr> my current plan is to always copy, unless the caller has
+ explicitely set a flag and is passing properly aligned buffers
+ <teythoon> u sure? I mean the caller is free to arange the stuff he intends
+ to send anyway he likes, how are you going to cow that then?
+ <teythoon> ok
+ <teythoon> right
+ <braunr> properly aligned buffers :)
+ <braunr> otherwise the kernel rejects the request
+ <teythoon> that's reasonable, yes
+ <braunr> in addition to being synchronous, ipc will also take a special
+ path in the scheduler to directly use the client scheduling context
+ <braunr> avoiding the sleep/wakeup overhead, and providing priority
+ inheritence by side effect
+ <teythoon> uh, but wouldn't dropping serialization create security and
+ reliability issues? if the receiver isn't doing a proper job sanitizing
+ its stuff
+ <braunr> why would the client not sanitize ?
+ <braunr> err
+ <braunr> server
+ <braunr> it has to anyway
+ <teythoon> sure, but a proper parser written once might be more robust,
+ even if it adds overhead
+ <teythoon> the serialization i mean
+ <braunr> it's just a layer
+ <braunr> even with high level types, you still need to sanitize
+ <braunr> the real downside is loosing cross architecture portability
+ <braunr> making the potential implementation of a single system image a lot
+ more restricted or difficult
+ <braunr> but i don't care about that much
+ <braunr> mach was built with this in mind though
+ <teythoon> it's a nice idea, but i don't believe anyone does ssi anymore
+ <braunr> i don't know
+ <teythoon> and certainly not across architectures
+ <braunr> there are few projects
+ <braunr> anyway it's irrelevant currently
+ <braunr> and my interface just restricts it, it doesn't prevent it
+ <braunr> so i consider it an acceptable compromise
+ <teythoon> so, does it run? what does it do?
+ <teythoon> it certainly is, yes
+ <braunr> for now, it manages memory (physical, virtual, kernel, and soon,
+ anonymous)
+ <braunr> support multiple processors with the required posix scheduling
+ policies
+ <braunr> (it uses a cute proportionally fair time sharing algorithm)
+ <braunr> there are locks (spin locks, mutexes, condition variables) and
+ lockless stuff (à la rcu)
+ <braunr> both x86 and x86_64 are supported
+ <braunr> (even pae)
+ <braunr> work queues
+ <teythoon> sounds impressive :)
+ <braunr> :)
+ <braunr> i also added basic debugging
+ <braunr> stack trace (including getting the symbol table) handling
+ <braunr> so yes, it's much much better than what i previously did
+ <braunr> and on the right track
+ <braunr> it already scales a lot better than mach for what it does
+ <braunr> there are generic data structures (linked list, red-black tree,
+ radix tree)
+ <braunr> the radix tree supports lockless lookups, so looking up both the
+ page cache and the ipc spaces is lockless)
+ <teythoon> that's nice :)
+ <braunr> there are a few things using global locks, but there are TODOs
+ about them
+ <braunr> even with that, it should be scalable enough for a start
+ <braunr> and improving those parts shouldn't be too difficult
+
+
+## IRC, freenode, #hurd, 2013-07-10
+
+ <nlightnfotis> braunr: From what I have understood you aim for x15 to be a
+ production ready μ-kernel for usage in the Hurd? Or is it unrelated to
+ the Hurd?
+ <braunr> nlightnfotis: it's for a hurd clone
+ <nlightnfotis> braunr: I see. Is it close to any of the existing
+ microkernels as far as its design is concerned (L4, Viengoos) or is it
+ new research?
+ <braunr> it's close to mach
+ <braunr> and qnx
+
+
+## IRC, freenode, #hurd, 2013-07-29
+
+ <braunr> making progress on x15 pmap module
+ <braunr> factoring code for mapping creation/removal on current/kernel and
+ remote processes
+ <braunr> also started "swap emulation" by reserving some physical memory to
+ act as swap backing store
+ <braunr> which will allow creating memory pressure very early in the
+ development process
+
+
+## IRC, freenode, #hurd, 2013-08-23
+
+ < nlightnfotis> braunr: something a little bit irrelevant: how many things
+ are missing from mach to be considered a solid base for the Hurd? Is it
+ only SMP and x86_64 support?
+ < braunr> define "solid base for the hurd"
+ < nlightnfotis> solid enough to not look for a replacement for it
+ < braunr> then i'd say, from my very personal point of view, that you want
+ x15
+ < nlightnfotis> I didn't understand this. Are you planning for x15 to be a
+ better mach?
+ < braunr> with a different interface, so not compatible
+ < braunr> and thus, not mach
+ < nlightnfotis> is the source code for it available? Can I read it
+ somewhere?
+ < braunr> the implied answer being: no, mach isn't a solid base for the
+ hurd considering your definition
+ < braunr> http://git.sceen.net/rbraun/x15.git/
+ < nlightnfotis> thanks. for that. So it's definite that mach won't stay for
+ long as the Hurd's base, right?
+ < braunr> it will, for long
+ < braunr> my opinion is that it needs to be replaced
+ < nlightnfotis> is it possible that it (slowly) gets rearchitected into
+ what's being considered a second generation microkernel, or is it
+ hopeless?
+ < braunr> it would require a new interface
+ < braunr> you can consider x15 to be a modern mach, with that new interface
+ < braunr> from a high level view, it's very similar (it's a hybrid, with
+ both scheduling and virtual memory management in the kernel)
+ < braunr> ipc change a lot
+
+
+## IRC, freenode, #hurd, 2013-09-23
+
+ <braunr> for those of us interested in x15 and scalability in general:
+ http://darnassus.sceen.net/~rbraun/radixvm_scalable_address_spaces_for_multithreaded_applications.pdf
+ <braunr> finally an implementation allowing memory mapping to occur
+ concurrently
+ <braunr> (which is another contention issue when using mach-like ipc, which
+ often do need to allocate/release virtual memory)
diff --git a/microkernel/mach/documentation.mdwn b/microkernel/mach/documentation.mdwn
index cc880ab6..61e3469b 100644
--- a/microkernel/mach/documentation.mdwn
+++ b/microkernel/mach/documentation.mdwn
@@ -1,5 +1,5 @@
[[!meta copyright="Copyright © 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
-2010 Free Software Foundation, Inc."]]
+2010, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -47,3 +47,14 @@ License|/fdl]]."]]"""]]
- [An IO System for Mach](http://shakthimaan.com/downloads/hurd/An%20IO%20System%20for%20Mach.pdf)
- [A Programmers' Guide to Mach System Call](http://shakthimaan.com/downloads/hurd/A.Programmers.Guide.to.the.Mach.System.Calls.pdf)
+
+
+# IRC, freenode, #hurd, 2013-09-15
+
+ <teythoon> braunr: btw, are there multiple kernel threads in gnumach?
+ <teythoon> and is it safe to do a synchronous rpc call to a userspace
+ server?
+ <braunr> teythoon: there are yes, but few
+ <braunr> teythoon: the main (perhaps only) kernel thread is the page daemon
+ <braunr> and no, it's not safe to do synchronous calls to userspace
+ <braunr> except to the default pager
diff --git a/microkernel/mach/gnumach/debugging.mdwn b/microkernel/mach/gnumach/debugging.mdwn
index 71e92459..7e7cfb4e 100644
--- a/microkernel/mach/gnumach/debugging.mdwn
+++ b/microkernel/mach/gnumach/debugging.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2007, 2008, 2009, 2011, 2012 Free Software
+[[!meta copyright="Copyright © 2007, 2008, 2009, 2011, 2012, 2013 Free Software
Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
@@ -75,6 +75,9 @@ When you're [[running_a_system_in_QEMU|hurd/running/qemu]] you can directly
kernel](http://www.nongnu.org/qemu/qemu-doc.html#SEC48).
+## [[open_issues/debugging_gnumach_startup_qemu_gdb]]
+
+
# Code Inside the Kernel
Alternatively you can use an approach like this one: add the following code
diff --git a/microkernel/mach/gnumach/hardware_compatibility_list.mdwn b/microkernel/mach/gnumach/hardware_compatibility_list.mdwn
index 587178e9..32e712c9 100644
--- a/microkernel/mach/gnumach/hardware_compatibility_list.mdwn
+++ b/microkernel/mach/gnumach/hardware_compatibility_list.mdwn
@@ -105,6 +105,11 @@ These boards are known to work. Gnumach/Hurd has been installed and run on these
* VIA EPIA-M Mini-ITX motherboard with VIA Nehemiah C3 1Ghz processor. Onboard NIC (VIA Rhine) works good.
* Compaq Deskpro ENS, Pentium3 (666 MHz upgraded to 1 GHz), Intel i815 chipset, chipset integrated NIC (detected twice, but works fine with eth0; trying to access eth1 confuses the driver and makes the system unusable), Matrox Mystique 220 (PCI) graphics card. Also works with rtl8029 (NE2000 PCI) NIC when onboard NIC disabled in BIOS setup.
* Abit BX6 Rev. 2.0 with Celeron 400, after disabling "memory hole at 15MB" option in BIOS setup. (Otherwise, Mach detects only 15MiB of RAM, making Hurd run *extremely* slow and instable.) Should also work with PentiumII or Pentium3.
+* IRC, freenode, #hurd, 2013-08-26:
+
+ < stargater> have anyone gnu/hurd running on real hw ?
+ < youpi> my latitude e6420 laptop, for instance
+
# User Failure Reports
diff --git a/microkernel/mach/gnumach/interface/syscall/mach_print.mdwn b/microkernel/mach/gnumach/interface/syscall/mach_print.mdwn
index ca52dca5..a169e92e 100644
--- a/microkernel/mach/gnumach/interface/syscall/mach_print.mdwn
+++ b/microkernel/mach/gnumach/interface/syscall/mach_print.mdwn
@@ -59,3 +59,32 @@ License|/fdl]]."]]"""]]
it
[[Makefile]], [[mach_print.S]], [[main.c]].
+
+
+## IRC, freenode, #hurd, 2013-07-01
+
+ <youpi> braunr: btw, we are missing the symbol in mach/Versions
+ <braunr> youpi: what symbol ?
+ <youpi> so the libc-provided RPC stub is not available
+ <youpi> mach_printf
+ <youpi> -f
+ <braunr> it's a system calll
+ <braunr> not exported
+ <youpi> s/RPC/system call/
+ <braunr> that's expected
+ <youpi> libc does provide stubs for system calls too
+ <braunr> yes but not for this one
+ <youpi> I don't see why we wouldn't want to include it
+ <youpi> ?! it does
+ <braunr> it's temporary
+ <braunr> oh
+ <braunr> there must be automatic parsing during build
+ <youpi> sure
+ <braunr> nice
+
+ <braunr> youpi: if we're going to make this system call exported by glibc,
+ i should change its interface first
+ <braunr> it was meant as a very quick-and-dirty hack and directly accesses
+ the caller's address space without going through a special copy-from-user
+ function
+ <braunr> not very portable
diff --git a/microkernel/mach/gnumach/memory_management.mdwn b/microkernel/mach/gnumach/memory_management.mdwn
index 4e237269..477f0a18 100644
--- a/microkernel/mach/gnumach/memory_management.mdwn
+++ b/microkernel/mach/gnumach/memory_management.mdwn
@@ -188,3 +188,18 @@ License|/fdl]]."]]"""]]
patch
<braunr> (more kernel memory, thus more physical memory - up to 1.8 GiB -
but then, less user memory)
+
+
+# IRC, freenode, #hurd, 2013-06-06
+
+ <nlightnfotis> braunr: quick question, what memory allocation algorithms
+ does the Mach use? I know it uses slab allocation, so I can guess buddy
+ allocators too?
+ <braunr> no
+ <braunr> slab allocator for kernel memory (allocation of buffers used by
+ the kernel itself)
+ <braunr> a simple freelist for physical pages
+ <braunr> and a custom allocator based on a red-black tree, a linked list
+ and a hint for virtual memory
+ <braunr> (which is practically the same in all BSD variants)
+ <braunr> and linux does something very close too
diff --git a/microkernel/mach/gnumach/ports.mdwn b/microkernel/mach/gnumach/ports.mdwn
index e7fdb446..2d9bc311 100644
--- a/microkernel/mach/gnumach/ports.mdwn
+++ b/microkernel/mach/gnumach/ports.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2007, 2008, 2009, 2011, 2012 Free Software
+[[!meta copyright="Copyright © 2007, 2008, 2009, 2011, 2012, 2013 Free Software
Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
@@ -27,3 +27,8 @@ License|/fdl]]."]]"""]]
* MIPS. Status completely unknown.
* [[open_issues/Mach_on_Top_of_POSIX]]. Status unknown.
+
+When starting a port for a new architecture, it might make sense to first
+target a [[!wikipedie desc=paravirtualized Paravirtualization]] environment,
+that already abstracts away some of the different hardware implementations'
+quirks.
diff --git a/microkernel/mach/history.mdwn b/microkernel/mach/history.mdwn
index 776bb1d7..c22ea739 100644
--- a/microkernel/mach/history.mdwn
+++ b/microkernel/mach/history.mdwn
@@ -78,3 +78,137 @@ IRC, freenode, #hurd, 2012-08-29:
https://developer.apple.com/library/mac/#documentation/Darwin/Conceptual/KernelProgramming/About/About.html
<pavlx> can't be anymore
http://developer.apple.com/techpubs/macosx/Darwin/General/KernelProgramming/About/index.html
+
+IRC, freenode, #hurd, 2013-07-03:
+
+ *** natsukao (~natsukao@dynamic-adsl-94-37-184-109.clienti.tiscali.it) has
+ joined channel #hurd
+ <natsukao> hi
+ <natsukao> on 2012-08-29: i wrote a part of messages that then were posted
+ on http://www.gnu.org/software/hurd/microkernel/mach/history.html
+ <natsukao> i am sorry to inform you that apple computer cuèertino.inc, has
+ moved the URL: https://ssl.apple.com/science/profiles/cornell
+ <natsukao> and i have not found nothing on the source code of that page,
+ <natsukao> i used lftp without any success
+ <natsukao> and then wget, nothing to do
+ <natsukao> i have not found a copy cache of
+ https://ssl.apple.com/science/profiles/cornell
+ <natsukao> next time we save the documents and we provide to do our
+ archive/s
+ <natsukao> so that will be always available the infos
+ *** natsukao (~natsukao@dynamic-adsl-94-37-184-109.clienti.tiscali.it) is
+ now known as pavlx
+ <pavlx> happy hacking !!!!
+ <pavlx> "paolo del bene" <W3C_Freedom@riseup.net>
+ <pavlx> p.s: i'll turn back as soon as possible
+
+ <pavlx> i found the page of Darwin History, removed from apple compter
+ cupertino.inc
+ <pavlx> "Cached_ http___developer.apple.com_darwin_history.html"
+ <pavlx> the page http://developer.apple.com/darwin/history.html was moved
+ and now is available on:
+ <pavlx>
+ http://www.google.it/url?q=http://www.macmark.de/files/darwin_evolution.pdf%3FPHPSESSID%3D8b8683a81445f69d510734baa13aabfc&sa=U&ei=wMzTUb-NBIeFOOL4gNgE&ved=0CCQQFjAD&usg=AFQjCNFlLwC24nB5t14VUmedK4EmeE7Ohw
+ <pavlx> or simply: http://www.macmark.de/files/darwin_evolution.pdf
+ <pavlx> slides on: "Travel - Computer Science and Software Engineering"
+ <pavlx> www.csse.uwa.edu.au/~peterj/personal/PDFs/WWDC2004-6UP.pdf
+ <pavlx> about apple computer cupertino.inc, but there are many interesting
+ news
+ <teythoon> pavlx: uh, lot's of marketing noise from apple >,<
+ <pavlx> i found better material just now:
+ http://www.pcs.cnu.edu/~mzhang/CPSC450_550/2003CaseStudy/Mach_Presentation_DavidRamsey.ppt
+ <pavlx> teythoon, sorry, i turn back to sleep, see you later, paolo
+ W3C_Freedom@riseup.net
+ <pavlx> i'll charge of that page only things dedicated to GNU/HURD, but
+ slides are not mine, i found on internet
+ <teythoon> pavlx: sure, I didn't ment to offend you in any way
+
+IRC, freenode, #hurd, 2013-07-04:
+
+ <pavlx> there are few problems:
+ <pavlx> http://www.gnu.org/software/hurd/microkernel/mach/history.html
+ <pavlx> on the page GrantBow wrote: Apple's Macintosh OSX (OS 10.x) is
+ based on Darwin. "Darwin uses a monolithic kernel based on ?FreeBSD 4.4
+ and the OSF/mk Mach 3." Darwin also has a Kernel Programming Book.
+ <pavlx> the link to Darwin was moved, is not anymore
+ http://www.apple.com/macosx/technologies/darwin.html
+ <pavlx> then it's not FreeBSD 4.4 but BSD
+ <pavlx> and the link to Kernel Programming was moved is not
+ http://developer.apple.com/techpubs/macosx/Darwin/General/KernelProgramming/About/index.html
+ but
+ https://developer.apple.com/library/mac/#documentation/Darwin/Conceptual/KernelProgramming/About/About.html
+ <pavlx> apple has moved the URL:
+ https://ssl.apple.com/science/profiles/cornell
+ <pavlx> apple has moved the URL:
+ http://www.apple.com/macosx/technologies/darwin.html
+ <pavlx> so on the website you can left few things about my old post:
+ <pavlx> from IRC, freenode, #hurd, 2012-08-29: needs to remove
+ <pavlx> http://dpaste.com/1286266/
+ <pavlx> the new one will be: http://pastebin.com/yMm8tcTN
+ IRC, freenode, #hurd, 2013-07-04:
+
+ <pavlx> was moved the page from apple.com about darwin kernel programming
+ as described on the https://www.gnu.org/software/hurd/microkernel/mach/history.html
+
+ <pavlx> the link to Kernel Programming:
+ https://developer.apple.com/library/mac/#documentation/Darwin/Conceptual/KernelProgramming/About/About.html
+ <pavlx> (anyway i searching with any key the things moved from apple)
+ <pavlx> about Darwin type http://apple.com/darwin
+ <pavlx> on the right side, towards the end of the website it says: Darwin
+ Technologies
+ <pavlx> click on it, or copy the URL in an other tab of your own browser,
+ and read:
+ https://developer.apple.com/library/mac/#documentation/MacOSX/Conceptual/OSX_Technology_Overview/SystemTechnology/SystemTechnology.html
+ <pavlx> and something is related to Darwin
+ <pavlx> and again : http://pastebin.com/DHkJDxy8
+ # Mac OS X Server
+
+ ... This kernel, known as Darwin, provides a stable, high-performance platform
+ for developing groundbreaking applications and system technologies. ...
+ http://www.apple.com/server/docs/MacOSX_Server_TO_300195.pdf
+
+ # Mac OS X Server Command-Line Administration
+
+ Page 1. Mac OS X Server Command-Line Administration For Version 10.3
+ http://www.apple.com/server/docs/Command_Line.pdf
+
+ # Press Info - Apple “Open Sources” Rendezvous
+
+ ... Rendezvous is part of a broader open source release today from Apple at
+ http://developer.apple.com/darwin which includes the Darwin 6.0.1 ...
+ http://www.apple.com/pr/library/2002/09/25Apple-Open-Sources-Rendezvous.html
+
+ # Press Info - Apple Releases Darwin 1.0 Open Source
+
+ ... Apple Releases Darwin 1.0 Open Source. New ... modules. Darwin 1.0 gives
+ developers access to essential Mac OS X source code. ...
+ http://www.apple.com/pr/library/2000/04/05Apple-Releases-Darwin-1-0-Open-Source.html
+
+ # Press Info - Apple's Mac OS X to Ship on March 24
+
+ ... Mac OS X is built upon an incredibly stable, open source, UNIX based
+ foundation called Darwin and features true memory protection, preemptive ...
+ http://www.apple.com/pr/library/2001/01/09Apples-Mac-OS-X-to-Ship-on-March-24.html
+
+ # Press Info - Mac OS X “Gold Master” Released To ...
+
+ ... Mac OS X is built upon an incredibly stable, open source, UNIX
+ basedfoundation called Darwin and features true memory protection ...
+ http://www.apple.com/pr/library/2001/03/07Mac-OS-X-Gold-Master-Released-To-Manufacturing.html
+
+ * Press Info - Apple Announces Mac OS X “Jaguar” ...
+
+ ... As an active member of the Open Source community, Apple has distributed
+ Open Directory technology through the Darwin Open Source Project. ...
+ http://www.apple.com/pr/library/2002/07/17Apple-Announces-Mac-OS-X-Jaguar-Server-Worlds-Easiest-to-Manage-UNIX-Based-Server-Software.html
+ <pavlx> and:
+ http://lists.apple.com/archives/darwinos-users/2005/Apr/msg00021.html
+ <youpi> pavlx: it's hard to follow the changes you are talking
+ about. Perhaps you could simply edit these wiki pages?
+ <pavlx> anyway i am saying to you that i found a mailing list where are
+ availables the sources codes of darwin ppc-801 and x86
+ <pavlx> and as last thing mac os x 10.4
+ <braunr> pavlx: what's all this about ?
+ <pavlx> i am sorry, i did changes on the wiki
+ <pavlx> but after the preview and after to have saved, it show again the
+ old chat of 2012
diff --git a/microkernel/mach/message/msgh_id.mdwn b/microkernel/mach/message/msgh_id.mdwn
index ea52904a..799ed5cc 100644
--- a/microkernel/mach/message/msgh_id.mdwn
+++ b/microkernel/mach/message/msgh_id.mdwn
@@ -13,6 +13,8 @@ License|/fdl]]."]]"""]]
Every [[message]] has an ID field, which is defined in the [[RPC]] `*.defs`
files.
+[[!toc]]
+
# IRC, freenode, #hurd, 2012-07-12
@@ -281,3 +283,25 @@ files.
<youpi> then submit to the list for review
<braunr> hm ok
<braunr> youpi: ok, next time, i'll commit such changes directly
+
+
+# Subsystems
+
+## IRC, freenode, #hurd, 2013-09-03
+
+ <teythoon> anything I need to be aware of if I want to add a new subsystem?
+ <teythoon> is there a convention for choosing the subsystem id?
+ <braunr> a subsystem takes 200 IDs
+ <braunr> grep other subsystems in mach and the hurd to avoid collisions of
+ course
+ <teythoon> yes
+ <teythoon> i know that ;)
+ <braunr> :)
+ <teythoon> i've noticed the _notify subsystems being x+500, should I follow
+ that?
+ <pinotree> 100 for rpc + 100 for their replies?
+ <braunr> teythoon: yes
+ <braunr> pinotree: yes
+ <teythoon> ok
+ <teythoon> we should really work on mig...
+ <braunr> ... :)
diff --git a/microkernel/mach/mig.mdwn b/microkernel/mach/mig.mdwn
index 331b3bf4..f8046cb2 100644
--- a/microkernel/mach/mig.mdwn
+++ b/microkernel/mach/mig.mdwn
@@ -1,5 +1,5 @@
-[[!meta copyright="Copyright © 2001, 2002, 2003, 2006, 2007, 2008, 2010 Free
-Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2001, 2002, 2003, 2006, 2007, 2008, 2010, 2013
+Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -22,9 +22,10 @@ wait for a result on a newly created [[reply port|port]], decode return
arguments from the reply message (*demarshalling*, or *unmarshalling*) and pass
them to the client program. Similar actions are provided in the skeletons that
are linked to server programs.
-
MIG allows very precise semantics to be specified about what the arguments are
and how to be passed.
+It has its problems with
+[[structured_data|open_issues/mig_portable_rpc_declarations]], however.
* [[Documentation]]
diff --git a/microkernel/mach/mig/documentation.mdwn b/microkernel/mach/mig/documentation.mdwn
index 7d4f1eca..e6bd1bb9 100644
--- a/microkernel/mach/mig/documentation.mdwn
+++ b/microkernel/mach/mig/documentation.mdwn
@@ -1,5 +1,5 @@
-[[!meta copyright="Copyright © 2002, 2003, 2005, 2007, 2008, 2009, 2010 Free
-Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2002, 2003, 2005, 2007, 2008, 2009, 2010, 2013
+Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -82,3 +82,20 @@ pp. 67--77."
* [[ServerCopy]]
* MIG *in action*: [[hurd/io_path]].
+
+
+## IRC, freenode, #hurd, 2013-09-04
+
+[[!tag open_issue_documentation open_issue_mig]]
+
+ <teythoon> btw, I just realized that mig mashes two very different things
+ together, namely the serialization/parsing and the message
+ sending/receiving
+ <braunr> yes
+ <teythoon> I'd prefer it if that were separated
+ <braunr> me too
+ <braunr> that's why i want x15 to have a bare messaging interface .. :)
+ <teythoon> \o/
+ <braunr> simple (but optimized) scatter-gather
+ <braunr> it makes sense for mig since mach messages do include
+ serialization metadata such as types
diff --git a/news/2008-09-11.mdwn b/news/2008-09-11.mdwn
index 0765a269..d5aa7811 100644
--- a/news/2008-09-11.mdwn
+++ b/news/2008-09-11.mdwn
@@ -1,4 +1,5 @@
-[[!meta copyright="Copyright © 2008, 2011 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2008, 2011, 2013 Free Software Foundation,
+Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -11,5 +12,6 @@ License|/fdl]]."]]"""]]
[[!meta date="2008-09-11"]]
All five students who worked on the Hurd during the **Google Summer of Code 2008** succeeded
-in their projects. For more information please see [[the_community/gsoc_page|community/gsoc]].
+in their projects.
+For more information please see [[2008 GSoC page|community/gsoc/2008]].
**Congratulations to both students and mentors!**
diff --git a/open_issues/64-bit_port.mdwn b/open_issues/64-bit_port.mdwn
index 66da44b9..b0c95612 100644
--- a/open_issues/64-bit_port.mdwn
+++ b/open_issues/64-bit_port.mdwn
@@ -1,4 +1,5 @@
-[[!meta copyright="Copyright © 2011, 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2011, 2012, 2013 Free Software Foundation,
+Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -137,3 +138,20 @@ See also [[microkernel/mach/gnumach/memory_management]].
<braunr> hm actually no, it would require mcmodel=large
<braunr> hum, that's stupid, we can make the kernel run at -2g, and use 3g
up to the sign extension hole for the kernel map
+
+
+# IRC, freenode, #hurd, 2013-07-02
+
+In context of [[mondriaan_memory_protection]].
+
+ <xscript> BTW, it's not like I have an infinite amount of time for this,
+ but having 64-bit support would be valuable for me, so I might contribute
+ that back if it's not a too monumental task
+ <xscript> I saw some discussions about 32bit apps on top of 64bit mach, but
+ I'd like a full 64bit system
+ <xscript> any clues?
+ <xscript> I suppose the compiler support is all there already
+ <xscript> is MIG (and mach) the only piece missing?
+ <braunr> the problem is the interfaces themselves
+ <braunr> type widths
+ <braunr> as passed between userspace and kernel
diff --git a/open_issues/anatomy_of_a_hurd_system.mdwn b/open_issues/anatomy_of_a_hurd_system.mdwn
index 677e4625..ba72b00f 100644
--- a/open_issues/anatomy_of_a_hurd_system.mdwn
+++ b/open_issues/anatomy_of_a_hurd_system.mdwn
@@ -323,7 +323,9 @@ Actually, the Hurd has never used an M:N model. Both libthreads (cthreads) and l
<braunr> etc..
-# IRC, freenode, #hurd, 2012-12-06
+# Service Directory
+
+## IRC, freenode, #hurd, 2012-12-06
<spiderweb> what is the #1 feature that distinguished hurd from other
operating systems. the concept of translators. (will read more when I get
@@ -333,6 +335,132 @@ Actually, the Hurd has never used an M:N model. Both libthreads (cthreads) and l
<braunr> and the VFS permissions to control access to those services
+## IRC, freenode, #hurd, 2013-05-23
+
+ <gnu_srs> Hi, is there any efficient way to control which backed
+ translators are called via RPC with a user space program?
+ <gnu_srs> Take for example io_stat: S_io_stat is defined in boot/boot.c,
+ pfinet/io-ops.c and pflocal/io.c
+ <gnu_srs> And the we have libdiskfs/io-stat.c:diskfs_S_io_stat,
+ libnetfs/io-stat.c:netfs_S_io_stat, libtreefs/s-io.c:treefs_S_io_stat,
+ libtrivfs/io-stat.c:trivfs_S_io_stat
+ <gnu_srs> How are they related?
+ <braunr> gnu_srs: it depends on the server (translator) managing the files
+ (nodes) you're accessing
+ <braunr> so use fsysopts to know the server, and see what this server uses
+ <gnu_srs> fsysopts /hurd/pfinet and fsysopts /hurd/pflocal gives the same
+ answer: ext2fs --writable --no-inherit-dir-group --store-type=typed
+ device:hd0s1
+ <braunr> of course
+ <braunr> the binaries are regular files
+ <braunr> see /servers/socket/1 and /servers/socket/2 instead
+ <braunr> which are the nodes representing the *service*
+ <braunr> again, the hurd uses the file system as a service directory
+ <braunr> this usage of the file system is at the core of the hurd design
+ <braunr> files are not mere files, they're service names
+ <braunr> it happens that, for most files, the service behind them is the
+ same as for regular files
+ <braunr> gnu_srs: this *must* be obvious for you to do any tricky work on
+ the hurd
+
+ <gnu_srs> Anyway, if I create a test program calling io_stat I assume
+ S_io_stat in pflocal is called.
+ <gnu_srs> How to make the program call S_io_stat in pfinet instead?
+ <braunr> create a socket managed by pfinet
+ <braunr> i.e. an inet or inet6 socket
+ <braunr> you can't assume io_stat is serviced by pflocal
+ <braunr> only stats on unix sockets of pipes will be
+ <braunr> or*
+ <gnu_srs> thanks, what about the *_S_io_stat functions?
+ <braunr> what about them ?
+ <gnu_srs> How they fit into the picture, e.g. diskfs_io_stat?
+ <gnu_srs> *diskfs_S_io_stat
+ <braunr> gnu_srs: if you open a file managed by a server using libdiskfs,
+ e.g. ext2fs, that one will be called
+ <gnu_srs> Using the same user space call: io_stat, right?
+ <braunr> it's all userspace
+ <braunr> say rather, client-side
+ <braunr> the client calls the posix stat() function, which is implemented
+ by glibc, which converts it into a call to io_stat, and sends it to the
+ server managing the open file
+ <braunr> the io_stat can change depending on the server
+ <braunr> the remote io_stat implementation, i mean
+ <braunr> identify the server, and you will identify the actual
+ implementation
+
+
+## IRC, freenode, #hurd, 2013-06-30
+
+ <hacklu> hi, what is the replacer of netname_check_in?
+
+ <hacklu> I want to ask another question. in my opinion, the rpc is the
+ mach's way, and the translator is the hurd's way. so somebody want to
+ lookup a service, it should not need to ask the mach kernel know about
+ this query. the hurd will take the control.
+ <hacklu> am I right?
+ <braunr> no
+ <braunr> that's nonsense
+ <braunr> service lookups has never been in mach
+ <braunr> first mach based systems used a service directory, whereas the
+ hurd uses the file system for that
+ <braunr> you still need mach to communicate with either of those
+ <hacklu> how to understand the term of service directory here?
+ <braunr> a server everyone knows
+ <braunr> which gives references to other servers
+ <braunr> usually, depending on the name
+ <braunr> e.g. name_lookup("net") -> port right to network server
+ <hacklu> is that people use netname_check_in to register service in the
+ past? now used libtrivfs?
+ <braunr> i don't know about netname_check_in
+ <braunr> old mach (not gnumach) documentation might mention this service
+ directory
+ <braunr> libtrivfs doesn't have much to do with that
+ <braunr> on the hurd, the equivalent is the file system
+ <hacklu> maybe that is outdate, I just found that exist old doc, and old
+ code which can't be build.
+ <braunr> every process knows /
+ <braunr> the file system is the service directory
+ <braunr> nodes refer to services
+ <hacklu> so the file system is the nameserver, any new service should
+ register in it before other can use
+ <braunr> and the file system is distributed, so looking up a service may
+ require several queries
+ <braunr> setting a translator is exactly that, registering a program to
+ service requests on a node
+ <braunr> the file system isn't one server though
+ <braunr> programs all know about /, but then, lookups are recursive
+ <braunr> e.g. if you have / and /home, and are looking for
+ /home/hacklu/.profile, you ask / which tells you about /home, and /home
+ will give you a right to /home/hacklu/.profile
+ <hacklu> even in the past, the mach don't provide name register service,
+ there must be an other server to provide this service?
+ <braunr> yes
+ <braunr> what's nonsense in your sentence is comparing RPCs and translators
+ <braunr> translators are merely servers attached to the file system, using
+ RPCs to communicate with the rest of the system
+ <hacklu> I know yet, the two just one thing.
+ <braunr> no
+ <braunr> two things :p
+ <braunr> completely different and unrelated except for one using the other
+ <hacklu> ah, just one used aonther one.
+ <hacklu> is exist anyway to anounce service except settrans with file node?
+ <braunr> more or less
+ <braunr> tasks can have special ports
+ <braunr> that's how one task knows about / for example
+ <braunr> at task creation, a right to / is inserted in the new task
+ <hacklu> I think this is also a file node way.
+ <braunr> no
+ <braunr> if i'm right, auth is referenced the same way
+ <braunr> and there is no node for auth
+ <hacklu> how the user get the port of auth with node?
+ <braunr> it's given when a task is created
+ <hacklu> pre-set in the creation of one task?
+ <braunr> i'm unconfortable with "pre-set"
+ <braunr> inserted at creation time
+ <braunr> auth is started very early
+ <braunr> then tasks are given a reference to it
+
+
# IRC, freenode, #hurd, 2012-12-10
<spiderweb> I want to work on hurd, but I think I'm going to start with
@@ -380,3 +508,298 @@ Actually, the Hurd has never used an M:N model. Both libthreads (cthreads) and l
<braunr> if you're looking for how to do it for a non-translator
application, the answer is probably somewhere in glibc
<braunr> _hurd_startup i'd guess
+
+
+# IRC, freenode, #hurd, 2013-06-15
+
+ <damo22> ive been reading a little about exokernels or unikernels, and i
+ was wondering if it might be relevant to the GNU/hurd design. I'm not
+ too familiar with hurd terminology so forgive me. what if every
+ privileged service was compiled as its own mini "kernel" that handled (a)
+ any hardware related to that service (b) any device nodes exposed by that
+ service etc...
+ <braunr> yes but not really that way
+ <damo22> under the current hurd model of the operating system, how would
+ you talk to hardware that required specific timings like sound hardware?
+ <braunr> through mapped memory
+ <damo22> is there such a thing as an interrupt request in hurd?
+ <braunr> obviously
+ <damo22> ok
+ <damo22> is there any documentation i can read that involves a driver that
+ uses irqs for hurd?
+ <braunr> you can read the netdde code
+ <braunr> dde being another project, there may be documentation about it
+ <braunr> somewhere else
+ <braunr> i don't know where
+ <damo22> thanks
+ <damo22> i read a little about dde, apparently it reuses existing code from
+ linux or bsd by reimplementing parts of the old kernel like an api or
+ something
+ <braunr> yes
+ <damo22> it must translate these system calls into ipc or something
+ <damo22> then mach handles it?
+ <braunr> exactly
+ <braunr> that's why i say it's not the exokernel way of doing things
+ <damo22> ok
+ <damo22> so does every low level hardware access go through mach?'
+ <braunr> yes
+ <braunr> well no
+ <braunr> interrupts do
+ <braunr> ports (on x86)
+ <braunr> everything else should be doable through mapped memory
+ <damo22> seems surprising that the code for it is so small
+ <braunr> 1/ why surprising ? and 2/ "so small" ?
+ <damo22> its like the core of the OS, and yet its tiny compared to say the
+ linux kernel
+ <braunr> it's a microkenrel
+ <braunr> well, rather an hybrid
+ <braunr> the size of the equivalent code in linux is about the same
+ <damo22> ok
+ <damo22> with the model that privileged instructions get moved to
+ userspace, how does one draw the line between what is OS and what is user
+ code
+ <braunr> privileged instructions remain in the kernel
+ <braunr> that's one of the few responsibilities of the kernel
+ <damo22> i see, so it is an illusion that the user has privilege in a sense
+ <braunr> hum no
+ <braunr> or, define "illusion"
+ <damo22> well the user can suddenly do things never imaginable in linux
+ <damo22> that would have required sudo
+ <braunr> yes
+ <braunr> well, they're not unimaginable on linux
+ <braunr> it's just not how it's meant to work
+ <damo22> :)
+ <braunr> and why things like fuse are so slow
+ <braunr> i still don't get "i see, so it is an illusion that the user has
+ privilege in a sense"
+ <damo22> because the user doesnt actually have the elevated privilege its
+ the server thing (translator)?
+ <braunr> it does
+ <braunr> not at the hardware level, but at the system level
+ <braunr> not being able to do it directly doesn't mean you can't do it
+ <damo22> right
+ <braunr> it means you need indirections
+ <braunr> that's what the kernel provides
+ <damo22> so the user cant do stuff like outb 0x13, 0x1
+ <braunr> he can
+ <braunr> he also can on linux
+ <damo22> oh
+ <braunr> that's an x86 specifity though
+ <damo22> but the user would need hardware privilege to do that
+ <braunr> no
+ <damo22> or some kind of privilege
+ <braunr> there is a permission bitmap in the TSS that allows userspace to
+ directly access some ports
+ <braunr> but that's really x86 specific, again
+ <damo22> i was using it as an example
+ <damo22> i mean you wouldnt want userspace to directly access everything
+ <braunr> yes
+ <braunr> the only problem with that is dma reall
+ <braunr> y
+ <braunr> because dma usually access physical memory directly
+ <damo22> are you saying its good to let userspace access everything minus
+ dma?
+ <braunr> otherwise you can just centralize permissions in one place (the
+ kernel or an I/O server for example)
+ <braunr> no
+ <braunr> you don't let userspace access everything
+ <damo22> ah
+ <damo22> yes
+ <braunr> userspace asks for permission to access one specific part (a
+ memory range through mapping)
+ <braunr> and can't access the rest (except through dma)
+ <damo22> except through dma?? doesnt that pose a large security threat?
+ <braunr> no
+ <braunr> you don't give away dma access to anyone
+ <braunr> only drivers
+ <damo22> ahh
+ <braunr> and drivers are normally privileged applications anyway
+ <damo22> so a driver runs in userspace?
+ <braunr> so the only effect is that bugs can affect other address spaces
+ indirectly
+ <braunr> netdde does
+ <damo22> interesting
+ <braunr> and they all should but that's not the case for historical reasons
+ <damo22> i want to port ALSA to hurd userspace :D
+ <braunr> that's not so simple unfortunately
+ <braunr> one of the reasons it's hard is that pci access needs arbitration
+ <braunr> and we don't have that yet
+ <damo22> i imagine that would be difficult
+ <braunr> yes
+ <braunr> also we're not sure we want alsa
+ <braunr> alsa drivers, maybe, but probably not the interface itself
+ <damo22> its tangled spaghetti
+ <damo22> but the guy who wrote JACK for audio hates OSS, and believes it is
+ rubbish due to the fact it tries to read and write to a pcm device node
+ like a filesystem with no care for timing
+ <braunr> i don't know audio well enough to tell you anything about that
+ <braunr> was that about oss3 or oss4 ?
+ <braunr> also, the hurd isn't a real time system
+ <braunr> so we don't really care about timings
+ <braunr> but with "good enough" latencies, it shouldn't be a problem
+ <damo22> but if the audio doesnt reach the sound card in time, you will get
+ a crackle or a pop or a pause in the signal
+ <braunr> yep
+ <braunr> it happens on linux too when the system gets some load
+ <damo22> some users find this unnacceptable
+ <braunr> some users want real time systems
+ <braunr> using soft real time is usually plenty enough to "solve" this kind
+ of problems
+ <damo22> will hurd ever be a real time system?
+ <braunr> no idea
+ <youpi> if somebody works on it why not
+ <youpi> it's the same as linux
+ <braunr> it should certainly be simpler than on linux though
+ <damo22> hmm
+ <braunr> microkernels are well suited for real time because of the well
+ defined interfaces they provide and the small amount of code running in
+ kernel
+ <damo22> that sounds promising
+ <braunr> you usually need to add priority inheritance and take care of just
+ a few corner cases and that's all
+ <braunr> but as youpi said, it still requires work
+ <braunr> and nobody's working on it
+ <braunr> you may want to check l4 fiasco.oc though
+
+
+# System Personality
+
+## IRC, freenode, #hurd, 2013-07-29
+
+ <teythoon> over the past few days I gained a new understanding of the Hurd
+ <braunr> teythoon: really ? :)
+ <tschwinge> teythoon: That it's a complex and distributed system? ;-)
+ <tschwinge> And at the same time a really simple one?
+ <tschwinge> ;-D
+ <teythoon> it's just a bunch of mach programs and some do communicate and
+ behave in a way a posix system would, but that is more a convention than
+ anything else
+ <teythoon> tschwinge: yes, kind of simple and complex :)
+ <braunr> the right terminology is "system personality"
+ <braunr> 11:03 < teythoon> over the past few days I gained a new
+ understanding of the Hurd
+ <braunr> teythoon: still no answer on that :)
+ <teythoon> braunr: ah, I spent lot's of time with the core servers and
+ early bootstrapping and now I gained the feeling that I've seen the Hurd
+ for what it really is for the first time
+
+
+# RPC Interfaces
+
+## IRC, freenode, #hurd, 2013-09-03
+
+ <rekado> I'm a little confused by the hurd and incubator git repos.
+ <rekado> DDE is only found in the dde branch in incubator, but not in the
+ hurd repo.
+ <rekado> Does this mean that DDE is not ready for master yet?
+ <braunr> yes
+ <rekado> If DDE is not yet used in the hurd (except in the dde branch in
+ the incubator repo), does pfinet use some custom glue code to use the
+ Linux drivers?
+ <braunr> this has nothing to do with pfinet
+ <braunr> pfinet is the networking stack, netdde are the networking drivers
+ <braunr> the interface between them doesn't change, whether drivers are in
+ kernel or not
+ <rekado> I see
+
+
+# IRC, freenode, #hurd, 2013-09-20
+
+ <giuscri> HI there, I have no previous knowledge about OS's. I'm trying to
+ undestand the structure of the Hurd and the comparison between, say,
+ Linux way of managing stuff ...
+ <giuscri> for instance, I read: "Unlike other popular kernel software, the
+ Hurd has an object-oriented structure that allows it to evolve without
+ compromising its design."
+ <giuscri> that means that while for adding feature to the Linux-kernel you
+ have to add some stuff `inside` a procedure, whilst in the Hurd kernel
+ you can just, in principle at least, add an object and making the kernel
+ using it?...
+ <giuscri> Am I making stuff too simple?
+ <giuscri> Thanks
+ <braunr> not exactly
+ <braunr> unix historically has a "file-oriented" structure
+ <braunr> the hurd allows servers to implement whatever type they want,
+ through the ability to create custom interfaces
+ <braunr> custom interfaces means custom calls, custom semantics, custom
+ methods on objects
+ <braunr> you're not restricted to the set of file interfaces (open, seek,
+ read, write, select, close, etc..) that unix normally provides
+ <giuscri> braunr: uhm ...some example?
+ <braunr> see processes for example
+ <braunr> see
+ http://darnassus.sceen.net/gitweb/savannah_mirror/hurd.git/tree/HEAD:/hurd
+ <braunr> this is the collection of interfaces the hurd provides
+ <braunr> most of them map to unix calls, because gnu aims at posix
+ compatibility too
+ <braunr> some are internal, like processes
+ <braunr> or authentication
+ <braunr> but most importantly, you're not restricted to that, you can add
+ your own interfaces
+ <braunr> on a unix, you'd need new system calls
+ <braunr> or worse, extending through the catch-all ioctl call
+ <giuscri> braunr: mhn ...sorry, not getting that.
+ <braunr> what part ?
+ <kilobug> ioctl has become such a mess :s
+ <giuscri> braunr: when you say that Unix is `file-oriented` you're
+ referring to the fact that sending/receiving data to/from the kernel is
+ designed like sending/receiving data to/from a file ...?
+ <braunr> not merely sending/receiving
+ <braunr> note how formatted your way of thinking is
+ <braunr> you directly think in terms of sending/receiving (i.e. read and
+ write)
+ <giuscri> braunr: (yes)
+ <braunr> that's why unix is file oriented, access to objects is done that
+ way
+ <braunr> on the hurd, the file interface is one interface
+ <braunr> there is nothing preventing you from implementing services with a
+ different interface
+ <braunr> as a real world example, people interested in low latency
+ profesionnal audio usually dislike send/recv
+ <braunr> see
+ http://lac.linuxaudio.org/2003/zkm/slides/paul_davis-jack/unix.html for
+ example
+ <kilobug> braunr: how big and messy ioctl has become is a good proof that
+ the Unix way, while powerful, does have its limits
+ <braunr> giuscri: keep in mind the main goal of the hurd is extensibility
+ without special privileges
+ <giuscri> braunr: privileges?
+ <braunr> root
+ <giuscri> braunr: what's wrong with privileges?
+ <braunr> they allow malicious/buggy stuff to happne
+ <braunr> and have dramatic effects
+ <giuscri> braunr: you're obviously *not* referring to the fact that once
+ one have the root privileges could change some critical-data
+ <giuscri> ?
+ <braunr> i'm referring to why privilege separation exists in the first
+ place
+ <braunr> if you have unprivileged users, that's because you don't want them
+ to mess things up
+ <braunr> on unix, extending the system requires privileges, giving those
+ who do it the ability to destroy everything
+ <giuscri> braunr: yes, I think the same
+ <braunr> the hurd is designed to allow unprivileged users to extend their
+ part of the system, and to some extent share that with other users
+ <braunr> although work still remains to completely achieve that
+ <giuscri> braunr: mhn ...that's the `server`-layer between the
+ single-application and kernel ?
+ <braunr> the multi-server based approach not only allows that, but
+ mitigates damage even when privileged servers misbehave
+ <braunr> one aspect of it yes
+ <braunr> but as i was just saying, even root servers can't mess things too
+ much
+ <braunr> for example, our old (sometimes buggy) networking stack can be
+ restarted when it behaves wrong
+ <braunr> the only side effect being some applications (ssh and exim come to
+ mind) which need to be restarted too because they don't expect the
+ network stack to be restarted
+ <giuscri> braunr: ...instead?
+ <braunr> ?
+ <kilobug> giuscri: on Linux, if the network stack crash/freezes, you don't
+ have any other option than rebooting the system - usually with a nice
+ "kernel pani"
+ <kilobug> giuscri: and you may even get filesystem corruption "for free" in
+ the bundle
+ <braunr> and hoping it didn't corrupt something important like file system
+ caches before being flushed
+ <giuscri> kilobug, braunr : mhn, ook
diff --git a/open_issues/arm_port.mdwn b/open_issues/arm_port.mdwn
index 8a2a037f..ebbad1a4 100644
--- a/open_issues/arm_port.mdwn
+++ b/open_issues/arm_port.mdwn
@@ -274,3 +274,55 @@ architecture.
duplicate efforts.
<braunr> little addition: it may have started, but we don't know about it
+
+# IRC, freenode, #hurd, 2013-09-18
+
+ <Hooligan0> as i understand ; on startup, vm_resident.c functions configure
+ the whole available memory ; but at this point the system does not split
+ space for kernel and space for future apps
+ <Hooligan0> when pages are tagged to be used by userspace ?
+ <braunr> Hooligan0: at page fault time
+ <braunr> the split is completely virtual, vm_resident deals with physical
+ memory only
+ <Hooligan0> braunr: do you think it's possible to change (at least)
+ pmap_steal_memory to mark somes pages as kernel-reserved ?
+ <braunr> why do you want to reserve memory ?
+ <braunr> and which memory ?
+ <Hooligan0> braunr: first because on my mmu i have two entry points ; so i
+ want to set kernel pages into a dedicated space that never change on
+ context switch (for best cache performance)
+ <Hooligan0> braunr: and second, because i want to use larger pages into
+ kernel (1MB) to reduce mmu work
+ <braunr> vm_resident isn't well suited for large pages :(
+ <braunr> i don't see the effect of context switch on kernel pages
+ <Hooligan0> at many times, context switch flush caches
+ <braunr> ah you want something like global pages on x86 ?
+ <Hooligan0> yes, something like
+ <braunr> how is it done on arm ?
+ <Hooligan0> virtual memory is split into two parts depending on msb bits
+ <Hooligan0> for example 3G/1G
+ <Hooligan0> MMU will use two pages tables depending on vaddr (hi-side or
+ low-side)
+ <braunr> hi is kernel, low is user ?
+ <Hooligan0> so, for the moment i've put mach at 0xC0000000 -> 0xFFFFFFFF ;
+ and want to use 0x00000000 -> 0xBFFFFFFF for userspace
+ <Hooligan0> yes
+ <braunr> ok, that's what is done for x86 too
+ <Hooligan0> 1MB pages for kernel ; and 4kB (or 64kB) pages for apps
+ <braunr> i suggest you give up the large page stuff
+ <braunr> well, you can use them for the direct physical mapping, but for
+ kernel objects, it's a waste
+ <braunr> or you can rewrite vm_resident to use something like a buddy
+ allocator but it's additional work
+ <Hooligan0> for the moment it's waste ; but with some littles changes this
+ allow only one level of allocation mapping ; -i think- it's better for
+ performances
+ <braunr> Hooligan0: it is, but not worth it
+ <Hooligan0> will you allow changes into vm_resident if i update i386 too ?
+ <braunr> Hooligan0: sure, as long as these are relevant and don't introduce
+ regressions
+ <Hooligan0> ok
+ <braunr> Hooligan0: i suggest you look at x15, since you may want to use it
+ as a template for your own changes
+ <braunr> as it was done for the slab allocator for example
+ <braunr> e.g. x15 already uses a buddy allocator for physical memory
diff --git a/open_issues/binutils.mdwn b/open_issues/binutils.mdwn
index 641083a7..b19a81a4 100644
--- a/open_issues/binutils.mdwn
+++ b/open_issues/binutils.mdwn
@@ -39,8 +39,8 @@ git diff --patience --stat=$COLUMNS,$COLUMNS --patch --src-prefix=./ --dst-prefi
-->
-Last reviewed up to the [[Git mirror's e7e89a91affc920f0d422c56ca04867e49ac5ac1
-(2013-05-27) sources|source_repositories/binutils]].
+Last reviewed up to the [[Git mirror's d2a61dc33b01c56f5153c1ddea7b1fb8f304f20d
+(2013-06-18) sources|source_repositories/binutils]].
* Globally
@@ -128,11 +128,11 @@ Last reviewed up to the [[Git mirror's e7e89a91affc920f0d422c56ca04867e49ac5ac1
Here's a log of a binutils build run; this is from our [[Git
repository|source_repositories/binutils]]'s `tschwinge/Paul_Desmond` branch,
-commit e7e89a91affc920f0d422c56ca04867e49ac5ac1 (2013-05-27), run on
+commit d2a61dc33b01c56f5153c1ddea7b1fb8f304f20d (2013-06-18), run on
kepler.SCHWINGE and coulomb.SCHWINGE.
$ export LC_ALL=C
- $ ../Paul_Desmond/configure --prefix="$PWD".install --enable-gold --with-sysroot=/ SHELL=/bin/dash CC=gcc-4.7 CXX=g++-4.7 2>&1 | tee log_build
+ $ ../Paul_Desmond/configure --prefix="$PWD".install --enable-gold --with-sysroot=/ SHELL=/bin/dash CC=gcc-4.8 CXX=g++-4.8 2>&1 | tee log_build
[...]
$ make 2>&1 | tee log_build_
[...]
@@ -142,8 +142,8 @@ harmonized. Debian GCC (which is used in binutils' testsuite) likes to pass
`--sysroot=/` to `ld`, so we need to configure binutils with support for
sysroots.
-This takes up around 950 MiB, and needs roughly 13 min on kepler.SCHWINGE and
-45 min on coulomb.SCHWINGE.
+This takes up around 1100 MiB, and needs roughly 13 min on kepler.SCHWINGE and
+53 min on coulomb.SCHWINGE.
<!--
@@ -160,20 +160,13 @@ formats, and more emulation vectors.
$ toolchain/logs/process binutils build
- * gold GNU/Linux vs. GNU/Hurd
-
- -checking for glibc ifunc support... both
- +checking for glibc ifunc support... dyn
-
- Missing [[IFUNC]] support on GNU/Hurd.
-
# Install
$ make install 2>&1 | tee log_install
[...]
-This takes up around 160 MiB, and needs roughly 1 min on kepler.SCHWINGE and 3
+This takes up around 170 MiB, and needs roughly 1 min on kepler.SCHWINGE and 3
min on coulomb.SCHWINGE.
@@ -189,7 +182,7 @@ min on coulomb.SCHWINGE.
$ make -k check 2>&1 | tee log_test
[...]
-This needs roughly 6 min on kepler.SCHWINGE and 47 min on coulomb.SCHWINGE.
+This needs roughly 5 min on kepler.SCHWINGE and 37 min on coulomb.SCHWINGE.
## Analysis
@@ -220,7 +213,7 @@ This needs roughly 6 min on kepler.SCHWINGE and 47 min on coulomb.SCHWINGE.
symbol handling in glibc, needed for our external [[/libpthread]]. TODO:
document properly.
- * `FAIL: gas/i386/rept`
+ * `FAIL: gas/i386/rept` (intermittently)
Added in commit 06f1247c54126b9f1e6acb8ff8c7be35aec6f44c (2012-06-07) as
part of the fix for [[!sourceware_PR 14201]], renamed in commit
@@ -251,14 +244,3 @@ This needs roughly 6 min on kepler.SCHWINGE and 47 min on coulomb.SCHWINGE.
-PASS: tls_phdrs_script_test
+FAIL: tls_phdrs_script_test
-
- -PASS: ifuncmain1static
- -PASS: ifuncmain1picstatic
- -PASS: ifuncmain2static
- -PASS: ifuncmain2picstatic
- -PASS: ifuncmain4static
- -PASS: ifuncmain4picstatic
- -PASS: ifuncmain5static
- -PASS: ifuncmain5picstatic
- -PASS: ifuncmain7static
- -PASS: ifuncmain7picstatic
diff --git a/open_issues/boehm_gc.mdwn b/open_issues/boehm_gc.mdwn
index 7f860bba..623dcb83 100644
--- a/open_issues/boehm_gc.mdwn
+++ b/open_issues/boehm_gc.mdwn
@@ -1,4 +1,5 @@
-[[!meta copyright="Copyright © 2010, 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2010, 2012, 2013 Free Software Foundation,
+Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -433,3 +434,92 @@ restults of GNU/Linux and GNU/Hurd look very similar.
<civodul> pinotree: is it a Debian-specific change, or included upstream?
<pinotree> libgc using SIGUSR1/2? upstream
<civodul> ok
+
+
+### IRC, freenode, #hurd, 2013-09-03
+
+ <congzhang> braunr: when will libc malloc say memory corruption?
+ <braunr> congzhang: usually on free
+ <braunr> sometimes on alloc
+ <congzhang> and after one thread be created
+ <congzhang> I want to know why and how to find the source
+ <congzhang> does libgc work well on hurd?
+ <braunr> i don't think it does
+ <congzhang> so , why it can't?
+ <braunr> congzhang: what ?
+ <congzhang> libgc was not work on hurd
+ <pinotree> why?
+ <congzhang> I try porting dotgnu
+ <braunr> ah
+ <braunr> nested signal handling
+ <congzhang> one program always receive Abort signal
+ <pinotree> and why it should be a problem in libgc?
+ <congzhang> for malloc memory corruption
+ <braunr> libgc relies on this
+ <congzhang> yes
+ <congzhang> so, is there a workaround to make it work?
+ <braunr> show the error please
+ <congzhang> http://paste.debian.net/34416/
+ <pinotree> where's libgc?
+ <congzhang> i compile dotgnu with enable-gc
+ <pinotree> so?
+ <congzhang> I am not sure about it
+ <pinotree> so why did you say earlier that libgc doesn't work?
+ <congzhang> because after I see one thread was created notice by gdb, it
+ memory corruption
+ <pinotree> so what?
+ <congzhang> maybe gabage collection happen, and gc thread start
+ <pinotree> that's speculation
+ <pinotree> you cannot debug things speculating on code you don't know
+ <pinotree> less speculation and more in-deep debugging, please
+ * congzhang I try again, to check weather thread list changing
+ <congzhang> sorry for this
+ <braunr> it simply looks like a real memory corruption (an overflow)
+ <congzhang> maybe PATH related problem
+ <pinotree> PATH?
+ <congzhang> yes
+ <braunr> PATH_MAX
+ <braunr> but unlikely
+ <congzhang> csant do path traverse
+ <congzhang> I fond the macro
+ <congzhang> found
+ <congzhang> #if defined(__sun__) || defined(__BEOS__)
+ <congzhang> #define BROKEN_DIRENT 1
+ <congzhang> #endif
+ <congzhang> and so for hurd?
+ <pinotree> BROKEN_DIRENT doesn't say much about what it does
+ <WhiteKIBA> nope
+ <WhiteKIBA> whoops
+ <congzhang> it seems other port meet the trouble too
+ <pinotree> which trouble?
+ <congzhang> http://comments.gmane.org/gmane.comp.gnu.dotgnu.developer/3642
+ <congzhang> (gdb) ptype struct dirent
+ <congzhang> type = struct dirent {
+ <congzhang> __ino_t d_ino;
+ <congzhang> unsigned short d_reclen;
+ <congzhang> unsigned char d_type;
+ <congzhang> unsigned char d_namlen;
+ <congzhang> char d_name[1];
+ <congzhang> }
+ <congzhang>
+ <congzhang> d_name should be char[PATH_MAX]?
+ <congzhang> and
+ http://libjit-linear-scan-register-allocator.googlecode.com/svn/trunk/pnet/support/dir.c
+ <pinotree> no
+ <braunr> stop pasting that much
+ <_d3f> uhm PATH_MAX on the hurd?
+ <braunr> and stop saying nonsense
+ <congzhang> sorry, i think four line was not worth to pastbin
+ <pinotree> they are 8
+ <congzhang> never again
+ <braunr> just try by defining BROKEN_DIRENT to 1 in all cases and see how
+ it goes
+ * congzhang read dir.c again
+ <congzhang> braunr: it does not crash this time, I do more test
+
+
+#### IRC, freenode, #hurd, 2013-09-04
+
+ <congzhang> hi, I am dotgnu work on hurd, and even winforms app
+ <congzhang> s/am/make
+ <congzhang> and maybe c# hello world translate another day :)
diff --git a/open_issues/clock_gettime.mdwn b/open_issues/clock_gettime.mdwn
index 98454d45..65ab52df 100644
--- a/open_issues/clock_gettime.mdwn
+++ b/open_issues/clock_gettime.mdwn
@@ -197,4 +197,14 @@ In context of [[select]].
"atomic" update of the struct with time :)
+# IRC, freenode, #hurd, 2013-09-04
+
+ <teythoon> do we have CLOCK_MONOTONIC ?
+ <braunr> teythoon: i think we do but it's actually a simple offset from
+ CLOCK_REALTIME .. :)
+ <teythoon> ah never mind, I do hate this posix time interface anyways
+ <braunr> really ?
+ <braunr> i think librt is decent
+
+
# Candidate for [[vDSO]] code?
diff --git a/open_issues/cloud.mdwn b/open_issues/cloud.mdwn
new file mode 100644
index 00000000..58ed2f5b
--- /dev/null
+++ b/open_issues/cloud.mdwn
@@ -0,0 +1,49 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+Some *cloud*y things.
+
+[[!toc]]
+
+
+# [[!wikipedia OpenStack]]
+
+## IRC, freenode, #hurd, 2013-09-21
+
+ <jproulx> Hmmm, was hoping to run hurd on my kvm based openstack cloud, but
+ no virtio.
+ <jproulx> I see "Write virtio drivers for KVM. Ideally they would be
+ userland" is listed as a "small hack", as a sysadmin rather than an OS
+ hacker it doesn't sound small to me, but if there's some standard
+ documentation on porting drivers I could take a run at it.
+ <youpi> well, perhaps "small" is not the proper word
+ <youpi> compared to e.g. revamping disk i/o :)
+ <youpi> it's not something one can achieve in e.g. 1h, for instance
+ <youpi> it's not something straightforward either, one has to get
+ documentation about virtio (I don't know what exists), and get
+ documentation about the mach device interface (that's in the gnumach
+ manual, the devnode translator can be used as a skeleton)
+ <youpi> jproulx: openstack imposes the use of virtio drivers? that's odd
+ <jproulx> that's more like I'd expect. I there's enough search terms in
+ your response for me to see what's really involved
+ <jproulx> youpi it doesn't impose that but it is how mine is configured the
+ other thousand VMs are happier that way.
+ <jproulx> I can look at that side too and see if I need to have everything
+ use the same device settings or if I can control it per instance
+ <jproulx> A bit of a non-sequitur at this point but just in case someone
+ searches the transcripts and sees my questions about hurd on openstack,
+ yes it is possible to specify non-virtio devices per image, here's the
+ commandline to load sthibault's qemu image into openstack with devices
+ that work:
+ <jproulx> glance image-create --property hw_disk_bus=ide --property
+ hw_cdrom_bus=ide --property hw_vif_model=rtl8139 --disk-format raw
+ --container-format bare --name gnu-hurd --copy-from
+ http://people.debian.org/~sthibault/hurd-i386/debian-hurd.img
+ <youpi> jproulx: thanks, I've pushed it on the wiki
diff --git a/open_issues/code_analysis.mdwn b/open_issues/code_analysis.mdwn
index bdd2ae18..67798c6a 100644
--- a/open_issues/code_analysis.mdwn
+++ b/open_issues/code_analysis.mdwn
@@ -193,3 +193,17 @@ There is a [[!FF_project 276]][[!tag bounty]] on some of these tasks.
* [Trinity: A Linux kernel fuzz tester (and then
some)](http://www.socallinuxexpo.org/scale11x/presentations/trinity-linux-kernel-fuzz-tester-and-then-some),
Dave Jones, The Eleventh Annual Southern California Linux Expo, 2013.
+
+ * Mayhem, *an automatic bug finding system*
+
+ IRC, freenode, #hurd, 2013-06-29:
+
+ <teythoon> started reading the mayhem paper referenced here
+ http://lists.debian.org/debian-devel/2013/06/msg00720.html
+ <teythoon> that's nice work, they are doing symbolic execution of x86
+ binary code, that's effectively model checking with some specialized
+ formulas
+ <teythoon> (too bad the mayhem code isn't available, damn those
+ academic people keeping the good stuff to themselvs...)
+ <teythoon> (and I really think that's bad practice, how should anyone
+ reproduce their results? that's not how science works imho...)
diff --git a/open_issues/crash_server.mdwn b/open_issues/crash_server.mdwn
index 7ed4afbf..5182df6f 100644
--- a/open_issues/crash_server.mdwn
+++ b/open_issues/crash_server.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2009, 2010, 2011 Free Software Foundation,
+[[!meta copyright="Copyright © 2009, 2010, 2011, 2013 Free Software Foundation,
Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
@@ -189,6 +189,65 @@ one...
mach_msg_trap
/home/tschwinge/tmp/gnumach/gnumach-1-branch-Xen-branch.build/../gnumach-1-branch-Xen-branch/ipc/mach_msg.c:1367
+
+# IRC, freenode, #hurd, 2013-09-07
+
+ <rekado> I'm trying to investigate a crash in pfinet, so it will actually
+ die. I just want to know why it dies and what the value of a few
+ variables has been when it died.
+ <teythoon> have you tried to make it dump core?
+ <rekado> oh, good idea.
+ <rekado> I'll try that.
+ <teythoon> do you know how?
+ <rekado> I don't, but I think I can figure it out.
+ <teythoon> look into /servers
+ <rekado> do I just have to set CRASHSERVER=/servers/crash-dump-core and run
+ pfinet in that environment?
+ <teythoon> possibly, I've never heard of CRASHSERVER, but it's certainly
+ plausible ;)
+ <teythoon> I just link crash to crash-dump-core, that way it is permanent
+ and for all processes
+ <rekado> found it in the website contents
+ <rekado> gotta try that.
+ <rekado> hmm, I can't get pfinet to dump core; linked /servers/crash to
+ /servers/crash-dump-core and compiled pfinet to raise(6) at one point.
+ <rekado> But no core file is created.
+ <teythoon> :/
+ <teythoon> rekado: try cd /tmp ; cat & kill -SIGILL %% to see if that dumps
+ core
+ <rekado> yes, this works.
+ <rekado> I replaced the original pfinet with my crashing version.
+ <rekado> Should it dump core to /hurd then?
+ <teythoon> I'm not sure about it's wd
+ <teythoon> hm, ok, I just did settrans -ca foo /hurd/pfinet and then killed
+ that pfient with SIGILL and it dumped core
+ <teythoon> to the directory I issued the settrans from
+ <rekado> So I must run it myself. I can't just replace the original binary
+ and have it dump core somewhere.
+ <teythoon> it seems that you have to use settrans -ca to start an active
+ translator
+ <teythoon> do fsysopts /servers/socket/2 to find out the cmdline of your
+ pfinet
+ <rekado> that's very helpful.
+ <rekado> thanks
+ <teythoon> then use this to restart it, e.g.:
+ <teythoon> settrans -afg /servers/socket/2 $(fsysopts /servers/socket/2)
+ <teythoon> if it dies it should dump core to you cwd
+ <rekado> great. Thank you very much. I had been wondering how to get the
+ full cmdline of pfinet.
+ * rekado makes a note of fsysopts
+ <rekado> yup, there's the core file. Nice.
+ <teythoon> cool 8D
+ <teythoon> btw, in case using gdb doesn't work out for your problem, if you
+ start pfinet (or any translator) this way (with -a == active), you can
+ write stuff to stderr
+ <rekado> yeah, I noticed that. The assert() call wrote to stderr. Useful.
+ <braunr> rekado: core dumps are another not-working-well feature of the
+ hurd :/
+ <braunr> i recommend attaching
+ <tschwinge> rekado: In case that's still helpful:
+ <http://www.gnu.org/software/hurd/hurd/debugging/translator.html>.
+
---
If someone is working in this area, they may want to have a look at
diff --git a/open_issues/dbus.mdwn b/open_issues/dbus.mdwn
index 2f02579e..a41515a1 100644
--- a/open_issues/dbus.mdwn
+++ b/open_issues/dbus.mdwn
@@ -1,4 +1,5 @@
-[[!meta copyright="Copyright © 2011, 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2011, 2012, 2013 Free Software Foundation,
+Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -14,15 +15,17 @@ The dbus problems are due to missing scm credentials [[sendmsg_scm_creds]] and s
[[pflocal_socket_credentials_for_local_sockets]]. There was also a problem with short timeout in
[[select]], but that has been solved in Debian by setting a minimum timeout of 1ms.
----
+[[!toc]]
-IRC, freenode, #hurd, 2011-11-26:
+
+# IRC, freenode, #hurd, 2011-11-26
<antrik> BTW, how much effort is necessary to fix dbus?
<pinotree> basically, have pflocal know who's the sender
(pid/uid/gid/groups) in the socket send op
-IRC, freenode, #hurd, 2011-12-16:
+
+# IRC, freenode, #hurd, 2011-12-16
<braunr> pinotree: what's the problem with dbus ?
<pinotree> braunr: select() returning 0 changed fd's with very short (eg <
@@ -53,7 +56,8 @@ IRC, freenode, #hurd, 2011-12-16:
<braunr> hm i agree with neal, i don't understand why the timeout is given
to the kernel as part of the mach_msg call
-IRC, freenode, #hurd, 2011-12-20:
+
+# IRC, freenode, #hurd, 2011-12-20
<braunr> hm, i don't see any occurrence of SCM_CREDENTIALS in dbus
<braunr> only SCM_RIGHTS
@@ -88,3 +92,164 @@ IRC, freenode, #hurd, 2011-12-20:
<pinotree> iirc roland didn't like one or more parts of it (but i could be
wrong)
<braunr> ok
+
+
+# IRC, freenode, #hurd, 2013-07-17
+
+ <teythoon> btw pinotree, what happened to your efforts to make dbus work?
+ <pinotree> not much, my initial patch was just a crude hack, a better
+ solution requires more thinkering and work
+ <teythoon> yes, ive seen that
+ <teythoon> but that was only a tiny patch against the libc, surely there
+ must be more to that?
+ <pinotree> not really
+ <teythoon> and the proper fix is to patch pflocal to query the auth server
+ and add the credentials?
+ <pinotree> possibly
+ <teythoon> that doesn't sound to bad, did you give it a try?
+ <pinotree> not really, got caught in other stuff
+
+
+# IRC, freenode, #hurd, 2013-09-02
+
+ <gnu_srs1> something is wrong with libc0.3 since the switch to 2.17. dbus
+ does not run any longer when rebuilt
+ <gnu_srs1> the latest build of dbus was with 2.13: libc0.3-dev: already
+ installed (2.13-38)
+ <pinotree> debug it
+ <gnu_srs1> Yes, I will. Maybe somebody could rebuild it and verify my
+ findings?
+ <pinotree> gnu_srs1: your finding is "doesn't work", which is generic and
+ does not help without investigation
+ <gnu_srs1> just rebuild it and: e.g. ./build-debug/bus/dbus-daemon --system
+ (--nofork)
+ <pinotree> gnu_srs1: please, debug it
+ <gnu_srs1> I have partially already. But maybe the problems only shows on
+ my box. I'll rebuild on another box before continuing debugging.
+ <pinotree> gnu_srs1: are you, by chance, running a libc or something else
+ with your scm_creds work?
+ <gnu_srs1> I did, but I've backed to 2.17-92 right now.
+ <gnu_srs1> sane problem with dbus on another box, something's fishy:-(
+ <gnu_srs1> braunr: any good way to find out if the dbus problems are with
+ libpthread? Setting a breakpoint with libc0.3-dbg installed.
+ <braunr> gnu_srs1: i don't know
+
+See [[glibc]], *Missing interfaces, amongst many more*, *`SOCK_CLOEXEC`*.
+
+
+# IRC, freenode, #hurd, 2013-09-04
+
+ <gnu_srs> Hi, looks like dbus requires abstract socket namespace: #undef
+ HAVE_ABSTRACT_SOCKETS What's missing?
+ <pinotree> uh?
+ <pinotree> abstract unix sockets are a Linux feature, and surely it is not
+ mandatory for dbus
+ <gnu_srs> Looks like dbus exits if they are not supported:
+ <gnu_srs> dbus_set_error (error, DBUS_ERROR_NOT_SUPPORTED, "Operating
+ system does not support abstract socket namespace\n");   _dbus_close
+ (listen_fd, NULL); 1061  return -1;
+ <pinotree> that is enclosed in a if (abstract)
+ <pinotree> and that parameter is set to true in other places (eg
+ dbus/dbus-server-unix.c) only when HAVE_ABSTRACT_SOCKETS is defined
+ <pinotree> so no, abstract sockets are not mandatory
+ <gnu_srs> Well this code is executed e.g. when running emacs remotely in
+ X. Have to dig deeper then to see why.
+ <pinotree> maybe it could have to do the fact that your dbus server is
+ running in linux and runs by default using such sockets type
+ <pinotree> but yes, you need to dig better
+ <gnu_srs> pinotree: You are right. when running natively the problem is:
+ <pinotree> *drums*
+ <gnu_srs> Manually: Process /usr/lib/at-spi2-core/at-spi-bus-launcher
+ exited with status 1
+ <pinotree> eh?
+ <gnu_srs> Error retrieving accessibility bus address:
+ org.freedesktop.DBus.Error.Spawn.ChildExited: ^
+ <pinotree> most probably that service does not start due to the lack of
+ socket credentials which affects dbus
+ <pinotree> uninstall or disable those additional services, they are not
+ your problem
+ <gnu_srs> credentials is enabled. which services to remove?
+ <pinotree> dunno
+
+
+# IRC, freenode, #hurd, 2013-09-11
+
+ <gnu_srs> Hi, looks like frebsd had (2008) the same problem as hurd when
+ sending credentials over PF_INET:
+ <gnu_srs>
+ http://lists.freebsd.org/pipermail/freebsd-hackers/2008-May/024577.html
+ <gnu_srs> Since the dbus code is about the same now (2013), maybe they
+ added support?
+ <gnu_srs> The next message in the thread confirms that the dbus code is
+ invalid, does anybody have pointers?
+ <pinotree> from what i've seen so far, socket credentials are done only for
+ local sockets (ie PF_UNIX)
+ <pinotree> i don't see how things like uid/gid/pid of the socket endpoint
+ can have anything to do with AF_INET
+ <pinotree> and socket credentials in dbus are used only in the [local]
+ socket transport, so there's no issue
+
+
+# IRC, freenode, #hurd, 2013-09-12
+
+ <gnu_srs> pinotree: Yes, there is an issue with dbus and AF_INET, see
+ test/corrupt.c: tests /corrupt/tcp and /corrupt/byte-order/tcp:-/
+ <pinotree> gnu_srs: what's wrong with those? they are just testing the
+ connection over a tcp socket
+ <pinotree> as said above, socket credentials shouldn't be used in such
+ cases
+ <gnu_srs> They are, see also test/relay.c: /relay and /limit tests:-(
+ <pinotree> how are they?
+ <pinotree> please be more specifc...
+ <gnu_srs> Just run the tests yourself with DBUS_VERBOSE=1
+ <pinotree> you are claiming there is a problem, so please specify what is
+ the actual issue
+ <gnu_srs> DBUS_VERBOSE=1 build-debug/test/test-relay
+ <pinotree> you are claiming there is a problem, so please specify what is
+ the actual issue
+ <gnu_srs> same with test-corrupt
+ <gnu_srs> look at the verbose output: Failed to write credentials: Failed
+ to write credentials byte: Invalid argument
+ <gnu_srs> coming from pfinet since PF_INET is used.
+ <pinotree> check what it does on linux then
+ <pinotree> put an abort() at the start of the read/write socket credential
+ functions in dbus-sysdeps-unix.c and see whether it is triggered also on
+ linux
+ <gnu_srs> SO_PEERCRED is used for linux and LOCAL_CREDS is used for
+ kfreebsd, so we are on our own here:-/
+ <pinotree> and linux' SO_PEERCRED works also on AF_INET sockets? i'd doubt
+ it
+ <gnu_srs>
+ http://stackoverflow.com/questions/10037086/so-peercred-vs-scm-credentials-why-there-are-both-of-them
+ <pinotree> yes, i know the difference, but please read what i asked again
+ <gnu_srs> I'll check to be sure...
+ <braunr> gnu_srs: user credentials are not supposed to be passed through an
+ AF_INET socket
+ <braunr> how hard is that to understand ?
+ <gnu_srs> OK, linux use send since CMSGCREDS is not defined to write
+ credentials. Working on how they are received.
+ <gnu_srs> braunr: I do understand, but the dbus code tries to do that for
+ Hurd:-(
+ <pinotree> then it should do that on linux too
+ <pinotree> (since the local socket credentials code is isolated in own
+ functions, and they are called only for the unix transport)
+ <gnu_srs> Happiness:-D, almost all dbus tests pass!
+ <gnu_srs> 17(17) dbus tests pass:)
+ <braunr> gnu_srs: hopefully your patch does things right
+ <gnu_srs> which patch
+ <braunr> adding credentials through unix socket
+ <braunr> isn't that what you're doing ?
+ <gnu_srs> the mail to MLs is from the stock installed packages.
+ <braunr> ?
+ <gnu_srs> the test reports are with the SCM_CREDS patches, but I stumbled
+ on the SCM_RIGHTS issues reported to MLs
+ <gnu_srs> no patches applied, just test the attached file yourself.
+ <braunr> so what's your work about ?
+ <gnu_srs> I'm working on SCM_CREDS, yes, and created patches for dbus,
+ glib2.0 and libc.
+ <gnu_srs> the mail was about some bug in the call to io_restrict_auth in
+ sendmsg.c: without any of my patches applied (another image)
+ <teythoon> gnu_srs: you have to give us more context, how are we supposed
+ to know how to find this sendmsg.c file?
+ <pinotree> (it's in glibc, but otherwise the remark is valid)
+ <pinotree> s/otherwise/anyway/
diff --git a/open_issues/dde.mdwn b/open_issues/dde.mdwn
index 65d84886..9cb31d1c 100644
--- a/open_issues/dde.mdwn
+++ b/open_issues/dde.mdwn
@@ -512,6 +512,18 @@ After the microkernel devroom at [[community/meetings/FOSDEM_2013]].
<antrik> hm... good point
+## IRC, freenode, #hurd, 2013-09-20
+
+ <braunr> i should take some time to integrate my pcap changes into the
+ libpcap debian package at least
+ <pinotree> braunr: if upstream is active, i'd say to go there directly
+ <braunr> the problem with that approach is that netdde is still not part of
+ our upstream code
+ <pinotree> don't understand the relation
+ <braunr> i don't want to send the pcap guys code for an interface that is
+ still not considered upstream ...
+
+
# IRC, freenode, #hurd, 2012-08-14
<braunr> it's amazing how much code just gets reimplemented needlessly ...
@@ -598,3 +610,179 @@ In context of [[libpthread]].
<braunr> hm, i haven't looked but, does someone know if virtio is included
in netdde ?
<youpi> braunr: nope, there's an underlying virtio layer needed before
+
+
+## IRC, freenode, #hurd, 2013-07-24
+
+ <teythoon> btw, I'd love to see libvirt support in hurd
+ <teythoon> I tried to hack up a dde based net translator
+ <teythoon> afaics they are very much like any other pci device, so the
+ infrastructure should be there
+ <teythoon> if anything I expect the libvirt stuff to be more easily
+ portable
+ <youpi> what do you mean by "a dde based net translator" ?
+ <youpi> ah, you mean virtio support in netdde ?
+ <teythoon> yes
+ <teythoon> virtio net is present in the kernel version we use for the dde
+ drivers
+ <teythoon> so I just copied the dde driver over, but I had no luck
+ compiling it
+ <youpi> ok, but what would be the benefice over e1000 & co?
+ <teythoon> any of the dde drivers btw
+ <teythoon> youpi: less overhead
+ <youpi> e1000 is already low overhead actually
+ <youpi> there are less and less differences in strategies for driving a
+ real board, and a virtual one
+ <youpi> we are seeing shared memory request buffer, dma, etc. in real
+ boards
+ <youpi> which ends up being almost exactly what virtio does :)
+ <youpi> ahci, for instance, really looks extremely like a virtio interface
+ <youpi> (I know, it's a disk, but that's the same idea, and I do know what
+ I'm talking about here :) )
+ <teythoon> that would actually be my next wish, a virtio disk driver, and
+ virt9p ;)
+ <braunr> on the other hand, i wouldn't spend much time on a virtio disk
+ driver for now
+ <braunr> the hurd as it is can't boot on a device that isn't managed by the
+ kernel
+ <braunr> we'd need to change the boot protocol
+ <teythoon> ok, I wasn't planning to, just wanted to see if I can easily
+ hack up the virtio-net translator
+ <braunr> well, as youpi pointed, there is little benefit to that as well
+ <braunr> but if that's what you find fun, help yourself :)
+ <teythoon> I didn't know that, I assumed there was some value to the virtio
+ stuff
+ <braunr> there is
+ <braunr> but relatively to other improvements, it's low
+
+
+## IRC, freenode, #hurd, 2013-09-14
+
+ <rekado> I'm slowly beginning to understand the virtio driver framework
+ after reading Rusty's virtio paper and the Linux sources of a few virtio
+ drivers.
+ <rekado> Has anyone started working on virtio drivers yet?
+ <youpi> rekado: nobody has worked on virtio drivers, as I know of
+ <rekado> youpi: I'm still having a hard time figuring out where virtio
+ would fit in in the hurd.
+ <rekado> I'm afraid I don't understand how drivers in the hurd work at all.
+ Will part of this have to be implemented in Mach?
+ <youpi> rekado: it could be implemented either as a Mach driver, or as a
+ userland driver
+ <youpi> better try the second alternative
+ <youpi> i.e. as a translator
+ <youpi> sitting on e.g. /dev/eth0 or /dev/hd0
+
+
+## IRC, freenode, #hurd, 2013-09-18
+
+ <rekado> To get started with virtio I'd like to write a simple driver for
+ the entropy device which appears as a PCI device when running qemu with
+ -device virtio-rng-pci .
+ <braunr> why entropy ?
+ <rekado> because it's the easiest.
+ <braunr> is it ?
+ <braunr> the driver itself may be, but integrating it within the system
+ probably isn't
+ <rekado> It uses the virtio framework but only really consists of a
+ read-only buffer virtqueue
+ <braunr> you're likely to want something that can be part of an already
+ existing subsystem like networking
+ <rekado> All the driver has to do is push empty buffers onto the queue and
+ pass the data it receives back from the host device to the client
+ <rekado> The thing about existing subsystems is: I don't really understand
+ them enough.
+ <rekado> I understand virtio, though.
+ <braunr> but isn't your goal understanding at least one ?
+ <rekado> yes.
+ <braunr> then i suggest working on virtio-net
+ <braunr> and making it work in netdde
+ <rekado> But to write a virtio driver for network I must first understand
+ how to actually talk to the host virtio driver/device.
+ <braunr> rekado: why ?
+ <rekado> There is still a knowledge gap between what I know about virtio
+ and what I have learned about the Hurd/Mach.
+ <braunr> are you trying to learn about virtio or the hurd ?
+ <rekado> both, because I'd like to write virtio drivers for the hurd.
+ <braunr> hm no
+ <rekado> with virtio drivers pass buffers to queues and then notify the
+ host.
+ <braunr> you may want it, but it's not what's best for the project
+ <rekado> oh.
+ <braunr> what's best is reusing existing drivers
+ <braunr> we're much too far from having enough manpower to maintain our own
+ <rekado> you mean porting the linux virtio drivers?
+ <braunr> there already is a virtio-net driver in linux 2.6
+ <braunr> so yes, reuse it
+ <braunr> the only thing which might be worth it is a gnumach in-kernel
+ driver for virtio block devices
+ <braunr> because currently, we need our boot devices to be supported by the
+ kernel itself ...
+ <rekado> when I boot the hurd with qemu and the entropy device I see it as
+ an unknown PCI device in the output of lspci.
+ <braunr> that's just the lspci database which doesn't know it
+ <rekado> Well, does this mean that I could actually talk to the device
+ already? E.g., through libpciaccess?
+ <rekado> I'm asking because I don't understand how exactly devices "appear"
+ on the Hurd.
+ <braunr> it's one of the most difficult topic currently
+ <braunr> you probably can talk to the device, yes
+ <braunr> but there are issues with pci arbitration
+ * rekado takes notes: "pci arbitration"
+ <rekado> so, this is about coordinating bus access, right?
+ <braunr> yes
+ <braunr> i'm not a pci expert so i can't tell you much more
+ <rekado> heh, okay.
+ <rekado> what kind of "issues with pci arbitration" are you referring to,
+ though?
+ <rekado> Is this due to something that Mach isn't doing?
+ <braunr> ideally, mach doesn't know about pci
+ <braunr> the fact we still need in-kernel drivers for pci devices is a big
+ problem
+ <braunr> we may need something like a pci server in userspace
+ <braunr> on l4 system it's called an io server
+ <rekado> How do in-kernel drivers avoid these issues?
+ <braunr> they don't
+ <rekado> Or rather: why is it they don't have these issues?
+ <braunr> they do
+ <rekado> oh.
+ <braunr> we had it when youpi added the sata driver
+ <braunr> so currently, all drivers need to avoid sharing common interrupts
+ for example
+ <braunr> again, since i'm not an expert about pci, i don't know more about
+ the details
+ <Hooligan0> pci arbitrations are made by hardware ... no ?
+ <braunr> Hooligan0: i don't know
+ <braunr> i'm not merely talking about bus mastering here
+ <braunr> simply preventing drivers from mapping the same physical memory
+ should be enforced somewhere
+ <braunr> i'm not sure it is
+ <braunr> same for irq sharing
+ <Hooligan0> braunr : is the support for boot devices into the kernel is
+ really needed if a loader put servers into the memory before starting
+ mach ?
+ <braunr> Hooligan0: there is a chicken-and-egg problem during boot,
+ whatever the solution
+ <braunr> obviously, we can preload from memory, but then you really want
+ your root file system to use a disk
+ <braunr> Hooligan0: the problem with preloading from memory is that you
+ want the root file system to use a real device
+ <braunr> the same way / refers to one on unix
+ <braunr> so you have an actual, persistent hierarchy from which the system
+ can be initialized and translators started
+ <braunr> you also want to share as much as possible between the early
+ programs and the others
+ <braunr> so for example, both the disk driver and the root file system
+ should be able to use the same libc instance
+ <braunr> this requires a "switch root" mechanism that needs to be well
+ defined and robust
+ <braunr> otherwise we'd just build our drivers and root fs statically
+ <braunr> (which is currently done with rootfs actually)
+ <braunr> and this isn't something we're comfortable with
+ <braunr> so for now, in-kernel drivers
+ <Hooligan0> humm ... disk driver and libc ... i see
+ <Hooligan0> in other way ... disk drivers can use only a little number of
+ lib* functions ; so with a static version, a bit of memory is lots
+ <Hooligan0> s/lots/lost
+ <Hooligan0> and maybe the driver can be hot-replaced after boot (ok ok,
+ it's more simple to say than to write)
diff --git a/open_issues/device_drivers_and_io_systems.mdwn b/open_issues/device_drivers_and_io_systems.mdwn
index 5bda0213..085a737a 100644
--- a/open_issues/device_drivers_and_io_systems.mdwn
+++ b/open_issues/device_drivers_and_io_systems.mdwn
@@ -92,3 +92,9 @@ Also see [[user-space device drivers]].
* OSF Mach
* Darwin
+
+ * IRC, freenode, #hurd, 2013-08-26
+
+ < stargater> in haiku is a layer wraper for bsd driver
+ < stargater>
+ https://www.haiku-os.org/news/2007-05-08/haiku_getting_a_freebsd_network_driver_compatibility_layer
diff --git a/open_issues/exec.mdwn b/open_issues/exec.mdwn
index 36513453..05deaa7a 100644
--- a/open_issues/exec.mdwn
+++ b/open_issues/exec.mdwn
@@ -10,7 +10,10 @@ License|/fdl]]."]]"""]]
[[!tag open_issue_hurd]]
-IRC, unknown channel, unknown date.
+[[!toc]]
+
+
+# IRC, unknown channel, unknown date.
<youpi> oh my, disabling gzip/bzip2 support makes apt preconfigure hang
<youpi> support in exec* I meant
@@ -30,6 +33,50 @@ Justus: This doesn't seem to be an issue anymore (2013-09-08):
Also I've never encountered a problem with apt.
+
+## IRC, freenode, #hurd, 2013-08-01
+
+ <teythoon> uh, all the non trivial exec server code has #ifdef'd BFD code
+ all over it and it looks like that isn't even used anymore
+ <teythoon> that's too bad actually, I figured out how to get the values
+ from BFD, not so for the other elf parser that is used instead
+
+
+## IRC, freenode, #hurd, 2013-08-05
+
+ <teythoon> btw, there is a Debian bug concerning zipped executables. now
+ I'm not sure if I understood the problem, but gziped and bzip2ed
+ executables work for me
+ <teythoon> (not that I'm a big fan of that particular feature)
+ <youpi> iirc these somehow got fixed yes
+ <youpi> something like a previous out of bound access
+ <teythoon> the exec server contains lot's of code that is unused and
+ probably bit rot (#ifdef BFD) or otherwise ignored (#if 0)
+ <youpi> yes :/
+ <teythoon> and there's gunzipping and bunzip2ing, which we probably don't
+ want anyway
+ <pinotree> why not?
+ <teythoon> we should strip all that from exec and start adding features
+ <teythoon> pinotree: b/c it's slow and the gain is questionable
+ <teythoon> it breaks mmapping the code in
+ <teythoon> exec/exec.c is huge (~2300 lines) and complex and it is an
+ essential server
+ <teythoon> and I wonder if the unzipping is done securely, e. g. if it's
+ not possible to crash exec with an maliciously compressed executable
+
+
+## IRC, freenode, #hurd, 2013-09-12
+
+ <rekado> The zip code in hurd/exec/ looks really complicated; does it
+ really just unpack zipped files in memory (which could be replaced by
+ library calls) or is there something else going on?
+ <braunr> rekado:
+ http://lists.gnu.org/archive/html/bug-hurd/2013-08/msg00049.html
+ <rekado> braunr: interesting. Thanks.
+ <rekado> Does this mean that the "small hack entry" on the contributing
+ page to use libz and libbz2 in exec is no longer valid?
+ <braunr> probably
+
---
May want to have a look at using BFD / libiberty/simpleobject.
diff --git a/open_issues/exec_leak.mdwn b/open_issues/exec_leak.mdwn
deleted file mode 100644
index b58d2c81..00000000
--- a/open_issues/exec_leak.mdwn
+++ /dev/null
@@ -1,57 +0,0 @@
-[[!meta copyright="Copyright © 2012 Free Software Foundation, Inc."]]
-
-[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
-id="license" text="Permission is granted to copy, distribute and/or modify this
-document under the terms of the GNU Free Documentation License, Version 1.2 or
-any later version published by the Free Software Foundation; with no Invariant
-Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
-is included in the section entitled [[GNU Free Documentation
-License|/fdl]]."]]"""]]
-
-[[!tag open_issue_hurd]]
-
-
-# IRC, freenode, #hurd, 2012-08-11
-
- <braunr> the exec servers seems to leak a lot
- <braunr> server*
- <braunr> exec now uses 109M on darnassus
- <braunr> it really leaks a lot
- <pinotree> only 109mb? few months ago, exec on exodar was taking more than
- 200mb after few days of uptime with builds done
- <braunr> i wonder how much it takes on the buildds
-
-
-# IRC, freenode, #hurd, 2012-08-17
-
- <braunr> the exec leak is tricky
- <braunr> bddebian: btw, look at the TODO file in the hurd source code
- <braunr> bddebian: there is a not from thomas bushnell about that
- <braunr> "*** Handle dead name notifications on execserver ports. !
- <braunr> not sure it's still a todo item, but it might be worth checking
- <bddebian> braunr: diskfs_execboot_class = ports_create_class (0, 0);
- This is what would need to change right? It should call some cleanup
- routine in the first argument?
- <bddebian> Would be ideal if it could just use deadboot() from exec.
- <braunr> bddebian: possible
- <braunr> bddebian: hum execboot, i'm not so sure
- <bddebian> Execboot is the exec task, no?
- <braunr> i don't know what execboot is
- <bddebian> It's from libdiskfs
- <braunr> but "diskfs_execboot_class" looks like a class of ports used at
- startup only
- <braunr> ah
- <braunr> then it's something run in the diskfs users ?
- <bddebian> yes
- <braunr> the leak is in exec
- <braunr> if clients misbehave, it shouldn't affect that server
- <bddebian> That's a different issue, this was about the TODO thing
- <braunr> ah
- <braunr> i don't know
- <bddebian> Me either :)
- <bddebian> For the leak I'm still focusing on do-bunzip2 but I am baffled
- at my results..
- <braunr> ?
- <bddebian> Where my counters are zero if I always increment on different
- vars but wild freaking numbers if I increment on malloc and decrement on
- free
diff --git a/open_issues/exec_memory_leaks.mdwn b/open_issues/exec_memory_leaks.mdwn
index d504c4f0..67281bdc 100644
--- a/open_issues/exec_memory_leaks.mdwn
+++ b/open_issues/exec_memory_leaks.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2012, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -12,8 +12,56 @@ License|/fdl]]."]]"""]]
There are is some memory leak in [[`exec`|hurd/translator/exec]].
+[[!toc]]
-# I
+
+# IRC, freenode, #hurd, 2012-08-11
+
+ <braunr> the exec servers seems to leak a lot
+ <braunr> server*
+ <braunr> exec now uses 109M on darnassus
+ <braunr> it really leaks a lot
+ <pinotree> only 109mb? few months ago, exec on exodar was taking more than
+ 200mb after few days of uptime with builds done
+ <braunr> i wonder how much it takes on the buildds
+
+
+## IRC, freenode, #hurd, 2012-08-17
+
+ <braunr> the exec leak is tricky
+ <braunr> bddebian: btw, look at the TODO file in the hurd source code
+ <braunr> bddebian: there is a not from thomas bushnell about that
+ <braunr> "*** Handle dead name notifications on execserver ports. !
+ <braunr> not sure it's still a todo item, but it might be worth checking
+ <bddebian> braunr: diskfs_execboot_class = ports_create_class (0, 0);
+ This is what would need to change right? It should call some cleanup
+ routine in the first argument?
+ <bddebian> Would be ideal if it could just use deadboot() from exec.
+ <braunr> bddebian: possible
+ <braunr> bddebian: hum execboot, i'm not so sure
+ <bddebian> Execboot is the exec task, no?
+ <braunr> i don't know what execboot is
+ <bddebian> It's from libdiskfs
+ <braunr> but "diskfs_execboot_class" looks like a class of ports used at
+ startup only
+ <braunr> ah
+ <braunr> then it's something run in the diskfs users ?
+ <bddebian> yes
+ <braunr> the leak is in exec
+ <braunr> if clients misbehave, it shouldn't affect that server
+ <bddebian> That's a different issue, this was about the TODO thing
+ <braunr> ah
+ <braunr> i don't know
+ <bddebian> Me either :)
+ <bddebian> For the leak I'm still focusing on do-bunzip2 but I am baffled
+ at my results..
+ <braunr> ?
+ <bddebian> Where my counters are zero if I always increment on different
+ vars but wild freaking numbers if I increment on malloc and decrement on
+ free
+
+
+# 2012-11-25
After twelve hours worth of `fork/exec` ([[GCC]]'s `check-c` part of the
testsuite), we got:
@@ -29,7 +77,7 @@ quite noticeable. In comparison:
276 0 3 1 1 344 442M 28.2M 0.6 48:09.36 91min /hurd/ext2fs /dev/hd2s5
-# II
+# 2012-12-20
After running the libtool testsuite for some time:
diff --git a/open_issues/fakeroot_eagain.mdwn b/open_issues/fakeroot_eagain.mdwn
index 6b684a04..168ddf7d 100644
--- a/open_issues/fakeroot_eagain.mdwn
+++ b/open_issues/fakeroot_eagain.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2012, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -132,7 +132,7 @@ License|/fdl]]."]]"""]]
<braunr> or rather, a lot more
<braunr> (or maybe not, since it leaks only in some cases)
-[[exec_leak]].
+[[exec_memory_leaks]].
<braunr> pinotree: actually, the behaviour under linux is the same with the
alternative correctly set, whereas faked-tcp is restarted (if used at
diff --git a/open_issues/gcc.mdwn b/open_issues/gcc.mdwn
index 3aa7b63b..d8a8cd5f 100644
--- a/open_issues/gcc.mdwn
+++ b/open_issues/gcc.mdwn
@@ -37,8 +37,8 @@ git diff --patience --stat=$COLUMNS,$COLUMNS --patch --src-prefix=./ --dst-prefi
-->
-Last reviewed up to the [[Git mirror's 0479dc77cf50ee78769b55563051cf72d39b3d60
-(2013-05-27) sources|source_repositories/gcc]].
+Last reviewed up to the [[Git mirror's 3a930d3fc68785662f5f3f4af02474cb21a62056
+(2013-06-06) sources|source_repositories/gcc]].
<http://gcc.gnu.org/install/configure.html> has documentation for the
`configure` switches.
@@ -48,20 +48,247 @@ Last reviewed up to the [[Git mirror's 0479dc77cf50ee78769b55563051cf72d39b3d60
* `configure.ac`
- * `libgomp/configure.tgt`
+ * `libstdc++-v3`
- * `libstdc++-v3/configure.host`
+ * `configure.host`
- `abi_baseline_pair` etc. setting.
+ `abi_baseline_pair` etc. setting. `config/abi/post/*-linux-gnu`.
+ TODO.
- * `libstdc++-v3/config/os/gnu-linux/*`
+ * `config/os/gnu-linux`
- Is used for all GNU systems, as per `libstdc++-v3/configure.host`.
- Should rename to `gnu-user` to reflect this?
+ Is used for all GNU systems, as per `configure.host`. Should
+ rename to `gnu-user` to reflect this? TODO.
* `gcc/acinclude.m4`:`gcc_GAS_FLAGS`: always pass `--32` to assembler for
x86 Linux. (Why?)
+ * `lib-prefix.m4` (present twice in GCC sources) contains one remaining
+ `linux`-only case.
+
+ * `libjava`
+
+ TODO:
+
+ classpath/include/jni_md-x86-linux-gnu.h
+
+ See below (`log_build`).
+
+ Makefile.am:## _GNU_SOURCE defined for some Linux builds. It doesn't hurt to
+ Makefile.am:## always define it. Some systems, including Linux, need
+ Makefile.am:# certain linuxthread functions get linked:
+ Makefile.am:## This is specific to Linux/{Free,Net,Open}BSD/Hurd and perhaps few others.
+ Makefile.am: $(mkinstalldirs) $(DESTDIR)$(SDK_INCLUDE_DIR)/linux; \
+ Makefile.am: $(DESTDIR)$(SDK_INCLUDE_DIR)/linux); \
+ Makefile.am: $(DESTDIR)$(SDK_INCLUDE_DIR)/linux/$$headername.h; \
+ classpath/NEWS: the epoll notification mechanism on Linux 2.6.
+ classpath/config.rpath: linux* | k*bsd*-gnu)
+ classpath/config.rpath: gnu* | linux* | k*bsd*-gnu)
+ classpath/config.rpath: linux*oldld* | linux*aout* | linux*coff*)
+ classpath/config.rpath: linux* | k*bsd*-gnu)
+ classpath/configure.ac: *linux*)
+ classpath/configure.ac: target_os=linux-gnu
+ classpath/configure.ac: AC_MSG_WARN(no, using x86-linux-gnu)
+ classpath/doc/cp-vmintegration.texinfo:has been primarily tested against Linux and lacks garbage collections, a
+ classpath/doc/cp-vmintegration.texinfo:Linux and Windows 2000. As of June, 2004, it does not appear that ORP
+ classpath/doc/cp-vmintegration.texinfo:This is a free Java Virtual Machine that is being developed on GNU/Linux
+ classpath/doc/cp-vmintegration.texinfo:Runs on the x86 and PowerPC architectures, on the AIX, Linux, and Mac
+ classpath/gnu/classpath/SystemProperties.java: && "Linux".equals(defaultProperties.get("os.name")))
+ classpath/gnu/java/nio/EpollSelectorImpl.java: * notification mechanism on GNU/Linux.
+ classpath/java/io/File.java: * <strong>Implementation note</strong>: Unlike the RI, on Linux and UNIX
+ classpath/java/net/MimeTypeMapper.java: // On Linux this usually means /etc/mime.types.
+ classpath/ltcf-cxx.sh: linux*)
+ classpath/ltcf-cxx.sh: linux*)
+ classpath/ltconfig:# Transform linux* to *-*-linux-gnu*, to support old configure scripts.
+ classpath/ltconfig:linux-gnu*) ;;
+ classpath/ltconfig:linux*) host=`echo $host | sed 's/^\(.*-.*-linux\)\(.*\)$/\1-gnu\2/'`
+ classpath/ltconfig: version_type=linux
+ classpath/ltconfig: version_type=linux
+ classpath/ltconfig: version_type=linux
+ classpath/ltconfig: version_type=linux
+ classpath/ltconfig: version_type=linux
+ classpath/ltconfig: version_type=linux
+ classpath/ltconfig:# No shared lib support for Linux oldld, aout, or coff.
+ classpath/ltconfig:linux-gnuoldld* | linux-gnuaout* | linux-gnucoff*)
+ classpath/ltconfig:# This must be Linux ELF.
+ classpath/ltconfig:linux-gnu*)
+ classpath/ltconfig: version_type=linux
+ classpath/ltconfig: # powerpc, because MkLinux only supported shared libraries with the
+ classpath/ltconfig: # most powerpc-linux boxes support dynamic linking these days and
+ classpath/ltconfig: # assume the GNU/Linux dynamic linker is in use.
+ classpath/ltconfig: dynamic_linker='GNU/Linux ld.so'
+ classpath/ltconfig: version_type=linux
+ classpath/ltconfig: version_type=linux
+ classpath/ltconfig: version_type=linux
+ classpath/ltconfig: version_type=linux
+ classpath/ltconfig: dynamic_linker='GNU/Linux ld.so'
+ classpath/ltconfig: version_type=linux
+ classpath/ltconfig: version_type=linux
+ classpath/ltconfig: version_type=linux
+ classpath/ltmain.sh:# compiler flags: $LTCFLAGS
+ classpath/ltmain.sh: *-*-linux*)
+ classpath/ltmain.sh: darwin|linux|osf|windows|none)
+ classpath/ltmain.sh: # Like Linux, but with the current version available in
+ classpath/ltmain.sh: linux)
+ classpath/m4/lib-link.m4: dnl 2. if it's /usr/local/include and we are using GCC on Linux,
+ classpath/m4/lib-link.m4: linux* | gnu* | k*bsd*-gnu) haveit=yes;;
+ classpath/m4/lib-link.m4: dnl 2. if it's /usr/local/lib and we are using GCC on Linux,
+ classpath/m4/lib-link.m4: linux* | gnu* | k*bsd*-gnu) haveit=yes;;
+ classpath/m4/lib-prefix.m4: dnl 3. if it's /usr/local/include and we are using GCC on Linux,
+ classpath/m4/lib-prefix.m4: linux* | gnu* | k*bsd*-gnu) haveit=yes;;
+ classpath/m4/lib-prefix.m4: CPPFLAGS="${CPPFLAGS}${CPPFLAGS:+ }-I$additional_includedir"
+ classpath/m4/lib-prefix.m4: dnl 3. if it's /usr/local/lib and we are using GCC on Linux,
+ classpath/m4/lib-prefix.m4: linux*) haveit=yes;;
+ classpath/m4/lib-prefix.m4: LDFLAGS="${LDFLAGS}${LDFLAGS:+ }-L$additional_libdir"
+ classpath/m4/lib-prefix.m4: dnl On glibc systems, the current practice is that on a system supporting
+ classpath/native/jni/java-net/javanet.c: /* Not writable on Linux */
+ classpath/native/jni/java-nio/gnu_java_nio_VMChannel.c: * vector based read call (currently readv on Linux).
+ classpath/native/jni/java-nio/gnu_java_nio_VMChannel.c: * vector based read call (currently readv on Linux).
+ classpath/vm/reference/java/lang/VMProcess.java: // Linux use a process-per-thread model, which means the same thread
+
+ configure.ac: *-*-linux*)
+ configure.ac: AC_DEFINE(LINUX_THREADS, 1, [Define if using POSIX threads on Linux.])
+ include/config.h.in:/* Define if using POSIX threads on Linux. */
+ include/config.h.in:#undef LINUX_THREADS
+ include/posix-threads.h:# ifdef LOCK_DEBUG /* Assumes Linuxthreads */
+ include/posix-threads.h:#ifndef LINUX_THREADS
+ include/posix-threads.h:// pthread_mutex_destroy does nothing on Linux and it is a win to avoid
+ include/posix-threads.h:#endif /* LINUX_THREADS */
+ include/posix-threads.h: // For linux_threads this is really a pointer to its thread data
+ include/posix-threads.h:// E.g. on X86 Linux, pthread_self() is too slow for our purpose.
+ include/posix-threads.h:// This code should probably go away when Linux/X86 starts using a
+ posix-threads.cc:#if defined(LINUX_THREADS) || defined(FREEBSD_THREADS)
+ posix-threads.cc: // LinuxThreads (prior to glibc 2.1) usurps both SIGUSR1 and SIGUSR2.
+ posix-threads.cc:#else /* LINUX_THREADS */
+ posix-threads.cc:#endif /* LINUX_THREADS */
+ posix-threads.cc: // In older glibc's (prior to 2.1.3), the cond_wait functions may
+ posix-threads.cc: // glibc 2.1.3 doesn't set the value of `thread' until after start_routine
+
+ configure.ac: # We can save a little space at runtime if the mutex has m_count
+ configure.ac: # or __m_count. This is a nice hack for Linux.
+ configure.ac: AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <pthread.h>]], [[
+ configure.ac: extern pthread_mutex_t *mutex; int q = mutex->m_count;
+
+ Makes sense to implement in our [[/libpthread]] ([[!taglink
+ open_issue_libpthread]])?
+
+ configure.ac: i?86-*-linux*)
+ configure.ac: SIGNAL_HANDLER=include/i386-signal.h
+ configure.ac: SIGNAL_HANDLER_AUX=include/x86_64-signal.h
+ include/i386-signal.h:// on an i386 based Linux system.
+ include/i386-signal.h: directly rather than via glibc. The sigaction structure that the
+ include/i386-signal.h: * called _directly_ by the kernel, because linuxthreads wraps signal
+ include/i386-signal.h: * handler to a linuxthreads wrapper, we will lose the PC adjustment
+ include/i386-signal.h: * Also, there may not be any unwind info in the linuxthreads
+
+ configure.ac: *-linux*)
+ configure.ac: host_os=linux;;
+
+ configure.host: i[34567]86*-linux* | \
+ configure.host: can_unwind_signal=yes
+ configure.host: libgcj_ld_symbolic='-Wl,-Bsymbolic'
+ configure.host: if test x$slow_pthread_self = xyes \
+ configure.host: [...]
+ configure.host: i[34567]86*-kfreebsd*-gnu | x86_64*-kfreebsd*-gnu)
+ configure.host: libgcj_ld_symbolic='-Wl,-Bsymbolic'
+ configure.host: slow_pthread_self=
+
+ java/lang/natObject.cc:// What follows currenly assumes a Linux-like platform.
+ java/lang/natObject.cc:// Some of it specifically assumes X86 or IA64 Linux, though that
+ java/lang/natObject.cc:# define INVALID_THREAD_ID 0 // Works for Linux?
+ java/lang/natObject.cc: const unsigned MIN_SLEEP_USECS = 2001; // Shorter times spin under Linux.
+ java/lang/natVMClassLoader.cc: // a module named (eg, on Linux) `lib-gnu-pkg-quux.so', followed
+
+ libltdl/acinclude.m4:x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*|s390*-*linux*|sparc*-*linux*)
+ libltdl/acinclude.m4: x86_64-*linux*)
+ libltdl/acinclude.m4: ppc64-*linux*|powerpc64-*linux*)
+ libltdl/acinclude.m4: LD="${LD-ld} -m elf32ppclinux"
+ libltdl/acinclude.m4: s390x-*linux*)
+ libltdl/acinclude.m4: sparc64-*linux*)
+ libltdl/acinclude.m4: x86_64-*linux*)
+ libltdl/acinclude.m4: ppc*-*linux*|powerpc*-*linux*)
+ libltdl/acinclude.m4: s390*-*linux*)
+ libltdl/acinclude.m4: sparc*-*linux*)
+ libltdl/acinclude.m4: # Under GNU Hurd, this test is not required because there is
+ libltdl/acinclude.m4: version_type=linux
+ libltdl/acinclude.m4: version_type=linux
+ libltdl/acinclude.m4: version_type=linux
+ libltdl/acinclude.m4: version_type=linux
+ libltdl/acinclude.m4: version_type=linux
+ libltdl/acinclude.m4: version_type=linux
+ libltdl/acinclude.m4: version_type=linux
+ libltdl/acinclude.m4:# No shared lib support for Linux oldld, aout, or coff.
+ libltdl/acinclude.m4:linux*oldld* | linux*aout* | linux*coff*)
+ libltdl/acinclude.m4:# This must be Linux ELF.
+ libltdl/acinclude.m4:linux*)
+ libltdl/acinclude.m4: version_type=linux
+ libltdl/acinclude.m4: # powerpc, because MkLinux only supported shared libraries with the
+ libltdl/acinclude.m4: # most powerpc-linux boxes support dynamic linking these days and
+ libltdl/acinclude.m4: # assume the GNU/Linux dynamic linker is in use.
+ libltdl/acinclude.m4: dynamic_linker='GNU/Linux ld.so'
+ libltdl/acinclude.m4: version_type=linux
+ libltdl/acinclude.m4: version_type=linux
+ libltdl/acinclude.m4: version_type=linux
+ libltdl/acinclude.m4: version_type=linux
+ libltdl/acinclude.m4: version_type=linux
+ libltdl/acinclude.m4: version_type=linux
+ libltdl/acinclude.m4: version_type=linux
+ libltdl/acinclude.m4:# This must be Linux ELF.
+ libltdl/acinclude.m4:linux*)
+ libltdl/acinclude.m4: linux*)
+ libltdl/acinclude.m4:linux*)
+ libltdl/acinclude.m4: linux*)
+ libltdl/acinclude.m4: # Linux and Compaq Tru64 Unix objects are PIC.
+ libltdl/acinclude.m4: # Linux and Compaq Tru64 Unix objects are PIC.
+ libltdl/acinclude.m4: linux*)
+ libltdl/acinclude.m4: linux*)
+ libltdl/acinclude.m4: gnu* | linux* | kfreebsd*-gnu | knetbsd*-gnu)
+ libltdl/acinclude.m4: # GNU and its variants, using gnu ld.so (Glibc)
+ libltdl/ltmain.sh: darwin|linux|osf|windows)
+ libltdl/ltmain.sh: # Like Linux, but with the current version available in
+ libltdl/ltmain.sh: linux)
+ shlibpath.m4: version_type=linux
+ shlibpath.m4: version_type=linux
+ shlibpath.m4: version_type=linux
+ shlibpath.m4: version_type=linux
+ shlibpath.m4: version_type=linux
+ shlibpath.m4: version_type=linux
+ shlibpath.m4:# No shared lib support for Linux oldld, aout, or coff.
+ shlibpath.m4:linux*oldld* | linux*aout* | linux*coff*)
+ shlibpath.m4:# This must be Linux ELF.
+ shlibpath.m4:linux*|k*bsd*-gnu)
+ shlibpath.m4: version_type=linux
+ shlibpath.m4: # powerpc, because MkLinux only supported shared libraries with the
+ shlibpath.m4: # most powerpc-linux boxes support dynamic linking these days and
+ shlibpath.m4: # assume the GNU/Linux dynamic linker is in use.
+ shlibpath.m4: dynamic_linker='GNU/Linux ld.so'
+ shlibpath.m4: version_type=linux
+ shlibpath.m4: version_type=linux
+ shlibpath.m4: version_type=linux
+ shlibpath.m4: version_type=linux
+ shlibpath.m4: version_type=linux
+ shlibpath.m4: version_type=linux
+
+ testsuite/lib/libjava.exp: if { [regexp "linux" $target_triplet] } {
+
+ Adds `-specs=libgcj-test.spec`, which is created by `configure`. *This
+ spec file is read by gcj when linking. It is only used by the testing
+ harnesses (in libjava and gdb).* TODO. [[!taglink open_issue_gdb]].
+
+ * `libgcc`
+
+ TODO:
+
+ * `config/t-linux`
+ * `config/i386/t-linux`
+ * `config/i386/linux-unwind.h`
+
+ * `libitm`
+
+ TODO:
+
+ * `libitm/config/linux`
+
* `hurd/usr`
`NATIVE_SYSTEM_HEADER_DIR`, `638454a19c1c08f01c10517bc72a114250fc4f33`,
@@ -94,10 +321,13 @@ Last reviewed up to the [[Git mirror's 0479dc77cf50ee78769b55563051cf72d39b3d60
* Might `-fsplit-stack` be useful for us with respect to our
[[multithreaded|multithreading]] libraries?
- * `--enable-languages=[...]`
+ * `gcc/ada`, `gcc/testsuite/ada`, `gcc/testsuite/gnat.dg`, `gnattools`,
+ `libada` (not reviewed)
* [[Ada (GNAT)|GNAT]] support is work in progress.
+ * `gcc/go`, `gcc/testsuite/go.test`, `libgo` (not reviewed)
+
* The [[Google Go's libgo|gccgo]] (introduced in
e440a3286bc89368b8d3a8fd6accd47191790bf2 (2010-12-03)) needs
OS configuration / support.
@@ -153,8 +383,6 @@ Last reviewed up to the [[Git mirror's 0479dc77cf50ee78769b55563051cf72d39b3d60
buildable out of the box)? See also
73905b5de0d9a086f22ded7638bb1c0ae1b91326.
- * Various testsuite bits should include `*-*-gnu*`, too.
-
* [low] [[toolchain/cross-gnu]] toolchain bootstrap vs. `fenv.h` in libgcc's
libbid:
@@ -303,40 +531,18 @@ Last reviewed up to the [[Git mirror's 0479dc77cf50ee78769b55563051cf72d39b3d60
"20110609002620.GA16719@const.famille.thibault.fr"]]. commit
026e608ecebcb2a6193971006a85276307d79b00.
- * 549e2197b118efb2d947aaa15d445b05c1b5ed62 `Import the asan runtime library
- into GCC tree`. Linux-specific things:
- `ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX`, `ASAN_LINUX`, `ASAN_POSIX`,
- `libsanitizer/asan/asan_linux.cc`,
- `libsanitizer/asan/asan_malloc_linux.cc`,
- `libsanitizer/asan/asan_posix.cc`,
- `libsanitizer/interception/interception.h`,
- `libsanitizer/interception/interception_linux.cc`,
- `libsanitizer/interception/interception_linux.h`,
- `libsanitizer/sanitizer_common/sanitizer_allocator.cc`,
- `libsanitizer/sanitizer_common/sanitizer_linux.cc`,
- `libsanitizer/sanitizer_common/sanitizer_posix.cc`,
- `libsanitizer/sanitizer_common/sanitizer_procmaps.h`,
- `libsanitizer/sanitizer_common/sanitizer_symbolizer_linux.cc`.
- 4afab99bf0fe2d6905a9fa9d6ab886ca102312df `Enable libsanitizer just on x86
- linux for now`. 492e75a7336b4dbfe38207ea3abf8d5bd72376a9 `Move
- libsanitizer configure logic to subdirectory`.
- 6aea389d84c2172668af5f108e2b17e131120d0b `Add STATIC_LIBASAN_LIBS for
- -static-libasan`. Further commits later on.
-
- * 9cf754572854d9d9cd43c277eb7afb12e4911358 `Import tsan runtime from
- llvm`. Linux-specific things: `libsanitizer/tsan/tsan_platform.h`,
- `libsanitizer/tsan/tsan_platform_linux.cc`,
- `libsanitizer/tsan/tsan_symbolize_addr2line_linux.cc`.
- a96132f29aa3dfe94141a87537f62ea73ce0fc19 `Set TSAN_SUPPORTED=yes for
- x86_64/i686-linux for 64-bit multilib`. Further commits later on.
+ * [[`libsanitizer`|_san]] (not reviewed)
+
+ A lot of Linux-specific things.
+
+
# Build
Here's a log of a GCC build run; this is from our [[Git repository's
-dcdba5abca23716daa6aeb5c92f367e0978e4539 (2013-05-27;
-0479dc77cf50ee78769b55563051cf72d39b3d60 (2013-05-27)), plus
-`id:"87txlnlg0z.fsf@kepler.schwinge.homeip.net"`
+2a3496bebfe9d89f11d0b7a591afac55e11d5263 (2013-06-06;
+3a930d3fc68785662f5f3f4af02474cb21a62056 (2013-06-06))
sources|source_repositories/gcc]], run on kepler.SCHWINGE and coulomb.SCHWINGE.
$ export LC_ALL=C
@@ -349,10 +555,11 @@ sources|source_repositories/gcc]], run on kepler.SCHWINGE and coulomb.SCHWINGE.
Different hosts may default to different shells and compiler versions; thus
harmonized.
-We're stuck with GCC 4.6 until there are Debian *gnat-4.7* packages avaible.
+We're stuck with GCC 4.6 until there are Debian *gnat-4.7*/*gnat-4.8* packages
+avaible.
-This takes up around 3.5 GiB, and needs roughly 3.5 h on kepler.SCHWINGE and 15
-h on coulomb.SCHWINGE.
+This takes up around 3.5 GiB, and needs roughly 3.5 h on kepler.SCHWINGE and
+15.25 h on coulomb.SCHWINGE.
<!--
@@ -370,7 +577,9 @@ h on coulomb.SCHWINGE.
Addressed in Debian glibc.
- * `host-linux.c` vs. `host-default.c`
+ * `gcc/config/host-linux.c` vs. `host-default.c`
+
+ * `gcc/config/x-linux`
* *fixincludes* stuff
@@ -382,15 +591,16 @@ h on coulomb.SCHWINGE.
Comes from `gcc/config.gcc`: `i386/t-pmm_malloc` vs. `i386/t-gmm_malloc`
for `i[34567]86-*-linux*` vs. `i[34567]86-*-*`.
- * *libgomp*
+ * `libgomp`
- * `libgomp/config/linux/`, `libgomp/config/linux/x86`
+ * `libgomp/config/linux`, `libgomp/config/linux/x86`
- `sed`ed away.
+ `sed`ed away in `log_build*`. TODO.
- * `-ftls-model=initial-exec -march=i486 -mtune=i686`
+ * `-march=i486 -mtune=i686`
- `sed`ed away.
+ `sed`ed away in `log_build*`. This comes from `libgomp/configure.tgt`,
+ where this is added to `XCFLAGS` for `i[456]86-*-linux*` only. TODO?
* Missing `EOWNERDEAD`, `ENOTRECOVERABLE`. What're they used for?
@@ -592,10 +802,10 @@ coulomb.SCHWINGE:
$ make -k check-target 2>&1 | tee log_test_4_check-target
[...]
-This needs roughly 7.25 h on kepler.SCHWINGE and 3.5 h (`check-fixincludes`,
-`gcc/check-ada`) + 14 h (`gcc/check-c`) + 4.5 h (`gcc/check-c++`) + 6 h
+This needs roughly 7.5 h on kepler.SCHWINGE and 3.75 h (`check-fixincludes`,
+`gcc/check-ada`) + 14 h (`gcc/check-c`) + 4.5 h (`gcc/check-c++`) + 7.25 h
(`gcc/check-fortran`, `gcc/check-java`, `gcc/check-lto`, `gcc/check-objc`) +
-9.75 h (`check-intl`, [...], `check-lto-plugin`, `check-target`) = 37.75 h on
+10.25 h (`check-intl`, [...], `check-lto-plugin`, `check-target`) = 39.75 h on
coulomb.SCHWINGE.
@@ -611,34 +821,380 @@ coulomb.SCHWINGE.
TODO.
- * As of b401cb7ed15602d244a6807835b0b9d740a302a8 (2012-11-26;
- 769bf18a20ee2540ca7601cdafabd62b18b9751b (2012-10-01)), all
- `gcc.dg/guality` and `g++.dg/guality` and a few more are no longer tested
- on coulomb.SCHWINGE and kepler.SCHWINGE.
+ * Some are correctly UNSUPPORTED:
+
+ * [[IFUNC]]
+
+ Also multiversioning, `g++.dg/ext/mv*`, for example (several of which
+ started FAILing (ICE) on kepler.SCHWINGE).
+
+ * SSE2 (`sse2_runtime`)
+
+ `g++.dg/other/i386-1.C`, `g++.dg/other/pr40446.C`,
+ `g++.dg/other/pr49133.C`, `gcc.dg/compat/union-m128-1_main.c`,
+ `gcc.dg/compat/vector-1a_main.c`, `gcc.dg/compat/vector-2a_main.c`,
+ `gcc.dg/pr36584.c`, `gcc.dg/pr37544.c`, `gcc.dg/torture/pr16104-1.c`,
+ `gcc.dg/torture/pr35771-1.c`, `gcc.dg/torture/pr50444.c`,
+ `gcc.dg/torture/stackalign/alloca-2.c`,
+ `gcc.dg/torture/stackalign/alloca-3.c`,
+ `gcc.dg/torture/stackalign/push-1.c`,
+ `gcc.dg/torture/stackalign/vararg-3.c`, `gcc.target/i386/pr39315-2.c`,
+ `gcc.target/i386/pr39315-4.c`, `gcc.target/i386/pr44948-2a.c`,
+ `gcc.target/i386/pr46880.c`, `gcc.target/i386/pr52736.c`,
+ `gcc.target/i386/pr54703.c`, `gcc.target/i386/sse2-extract-1.c`,
+ several from `gfortran.fortran-torture`
+
+ * [[`asan.exp`|_san]]
+
+ * missing profiling C library (`-lc_p`)
+
+ `g++.old-deja/g++.law/profile1.C`, `gcc.dg/20021014-1.c`,
+ `gcc.dg/nest.c`, `gcc.dg/nested-func-4.c`, `gcc.dg/pr32450.c`,
+ `gcc.dg/pr43643.c`
+
+ * other C libraries
+
+ `gcc.target/i386/long-double-64-2.c`,
+ `gcc.target/i386/long-double-80-3.c`
+
+ * `gcc`
+
+ spawn [open ...]
+ FAIL: gcc.dg/split-2.c execution test
+
+ FAIL: gcc.dg/split-5.c execution test
+
+ TODO.
+
+ xgcc: internal compiler error: Aborted (program cc1)
+ libbacktrace could not find executable to open
+ Please submit a full bug report, [...]
+ FAIL: largefile.c -O0 -g -I. -Dwith_PCH (internal compiler error)
+ [...]
+
+ TODO.
+
+ * `g++`
+
+ spawn [open ...]
+ terminate called after throwing an instance of 'int'
+ FAIL: g++.dg/eh/sighandle.C -std=gnu++98 execution test
+
+ FAIL: g++.dg/eh/sighandle.C -std=gnu++11 execution test
+
+ TODO.
+
+ spawn [open ...]
+ FAIL: g++.dg/cdce3.C -std=gnu++98 execution test
+
+ FAIL: g++.dg/cdce3.C -std=gnu++11 execution test
+
+ TODO.
+
+ FAIL: g++.dg/tls/thread_local3.C -std=gnu++11 execution test
+ FAIL: g++.dg/tls/thread_local3g.C -std=gnu++11 execution test
+ FAIL: g++.dg/tls/thread_local4.C -std=gnu++11 execution test
+ FAIL: g++.dg/tls/thread_local4g.C -std=gnu++11 execution test
+ FAIL: g++.dg/tls/thread_local5.C -std=gnu++11 execution test
+ FAIL: g++.dg/tls/thread_local5g.C -std=gnu++11 execution test
+
+ They used to PASS, but FAIL as of
+ 769bf18a20ee2540ca7601cdafabd62b18b9751b..be3860ba8df48cca3253da4f02fd2d42d856ce80.
+ TODO.
+
+ -PASS: g++.dg/vect/pr36648.cc -std=c++98 execution test
+ -PASS: g++.dg/vect/pr36648.cc -std=c++11 execution test
+
+ On kepler.SCHWINGE, executables are generated (and run), on
+ coulomb.SCHWINGE only assembler code is generated. TODO. Likewise for
+ execution tests from `gcc.dg/vect` and `gfortran.dg/vect`.
+
+ * `gcc`, `g++`
+
+ FAIL: gcc.dg/cleanup-10.c execution test
+ FAIL: gcc.dg/cleanup-11.c execution test
+ FAIL: gcc.dg/cleanup-8.c execution test
+ FAIL: gcc.dg/cleanup-9.c execution test
+ FAIL: g++.dg/ext/cleanup-10.C -std=gnu++98 execution test
+ FAIL: g++.dg/ext/cleanup-10.C -std=gnu++11 execution test
+ FAIL: g++.dg/ext/cleanup-11.C -std=gnu++98 execution test
+ FAIL: g++.dg/ext/cleanup-11.C -std=gnu++11 execution test
+ FAIL: g++.dg/ext/cleanup-8.C -std=gnu++98 execution test
+ FAIL: g++.dg/ext/cleanup-8.C -std=gnu++11 execution test
+ FAIL: g++.dg/ext/cleanup-9.C -std=gnu++98 execution test
+ FAIL: g++.dg/ext/cleanup-9.C -std=gnu++11 execution test
+
+ TODO.
+
+ spawn [open ...]
+ gdb: took too long to attach
+ testcase [...]/gcc/testsuite/gcc.dg/guality/guality.exp completed in 16 seconds
+
+ spawn [open ...]
+ gdb: took too long to attach
+ testcase [...]/gcc/testsuite/g++.dg/guality/guality.exp completed in 20 seconds
- * As of b401cb7ed15602d244a6807835b0b9d740a302a8 (2012-11-26;
- 769bf18a20ee2540ca7601cdafabd62b18b9751b (2012-10-01)), there are
- regressions (FAILs) in libgomp execution tests on coulomb.SCHWINGE.
+ TODO. The gfortran ones worked fine.
- * 769bf18a20ee2540ca7601cdafabd62b18b9751b..be3860ba8df48cca3253da4f02fd2d42d856ce80
+ * `[ARCH]/libgomp`
- On GNU/Hurd:
+ As of dcdba5abca23716daa6aeb5c92f367e0978e4539 (2013-05-27;
+ 0479dc77cf50ee78769b55563051cf72d39b3d60 (2013-05-27)), plus
+ `id:"87txlnlg0z.fsf@kepler.schwinge.homeip.net"`, about a dozen of them
+ (but different ones per each run) FAIL on coulomb.SCHWINGE:
- Running [...]/hurd/master/gcc/testsuite/g++.dg/tls/tls.exp ...
- +FAIL: g++.dg/tls/thread_local3.C -std=gnu++11 execution test
- +FAIL: g++.dg/tls/thread_local3g.C -std=gnu++11 execution test
- +FAIL: g++.dg/tls/thread_local4.C -std=gnu++11 execution test
- +FAIL: g++.dg/tls/thread_local4g.C -std=gnu++11 execution test
- +FAIL: g++.dg/tls/thread_local5.C -std=gnu++11 execution test
- +FAIL: g++.dg/tls/thread_local5g.C -std=gnu++11 execution test
+ spawn [open ...]
+
+ Program aborted. Backtrace:
+ #0 0x1042523
+ #1 0x1043D6F
+ #2 0x10F9BC7
+ FAIL: libgomp.fortran/lib1.f90 -O1 execution test
- They used to PASS.
+ All have basically the same backtrace. TODO.
+
+ * `[ARCH]/libjava`
+
+ spawn [open ...]
+ Exception in thread "main" java.io.IOException: Invalid argument
+ at gnu.java.nio.channels.FileChannelImpl.write(natFileChannelImpl.cc:202)
+ at java.io.FileOutputStream.write(libgcj.so.14)
+ at java.io.DataOutputStream.write(libgcj.so.14)
+ at java.io.RandomAccessFile.write(libgcj.so.14)
+ at LargeFile.main(LargeFile.exe)
+ FAIL: LargeFile execution - source compiled test
+ UNTESTED: LargeFile output - source compiled test
+
+ FAIL: LargeFile -findirect-dispatch execution - source compiled test
+ UNTESTED: LargeFile -findirect-dispatch output - source compiled test
+ FAIL: LargeFile -O3 execution - source compiled test
+ UNTESTED: LargeFile -O3 output - source compiled test
+ FAIL: LargeFile -O3 -findirect-dispatch execution - source compiled test
+ UNTESTED: LargeFile -O3 -findirect-dispatch output - source compiled test
+
+ TODO.
+
+ spawn [open ...]
+ 1
+ FAIL: Throw_2 execution - source compiled test
+ UNTESTED: Throw_2 output - source compiled test
+
+ FAIL: Throw_2 -findirect-dispatch execution - source compiled test
+ UNTESTED: Throw_2 -findirect-dispatch output - source compiled test
+ FAIL: Throw_2 -O3 execution - source compiled test
+ UNTESTED: Throw_2 -O3 output - source compiled test
+ FAIL: Throw_2 -O3 -findirect-dispatch execution - source compiled test
+ UNTESTED: Throw_2 -O3 -findirect-dispatch output - source compiled test
+
+ TODO.
+
+ * `[ARCH]/libmudflap`
+
+ spawn [open ...]
+ FAIL: libmudflap.cth/pass37-frag.c (-O0) execution test
+ FAIL: libmudflap.cth/pass37-frag.c (-O0) output pattern test
+
+ FAIL: libmudflap.cth/pass37-frag.c (-O0) (rerun 1) execution test
+ FAIL: libmudflap.cth/pass37-frag.c (-O0) (rerun 1) output pattern test
+ [...]
+
+ TODO. Seems like not just timeouts (though, reported before: [[!GCC_PR
+ 20003]]). If GDB is to believed, it seems like confusion between
+ libmudflap and glibc startup (while setting up the signal thread?):
+
+ #0 getenv (name=0x12dabee "LANGUAGE") at getenv.c:81
+ #1 0x011b2c78 in guess_category_value (categoryname=<optimized out>, category=<optimized out>) at dcigettext.c:1359
+ #2 __dcigettext (domainname=0x12dab1b <_libc_intl_domainname> "libc", msgid1=0x12e1cd8 "Error in unknown error system: ", msgid2=0x0, plural=0, n=0, category=5) at dcigettext.c:575
+ #3 0x011b1c53 in __dcgettext (domainname=0x12dab1b <_libc_intl_domainname> "libc", msgid=0x12e1cd8 "Error in unknown error system: ", category=5) at dcgettext.c:53
+ #4 0x01203728 in __strerror_r (errnum=-1, buf=0x15ff648 "", buflen=1024) at ../sysdeps/mach/_strerror.c:57
+ #5 0x011b0f30 in __assert_perror_fail (errnum=-1, file=0x1133969 "./pthread/cthreads-compat.c", line=45, function=0x1133985 <__PRETTY_FUNCTION__.5356> "cthread_fork") at assert-perr.c:62
+ #6 0x011324d4 in cthread_fork (func=0x118b0b0 <_hurd_msgport_receive>, arg=0x0) at ./pthread/cthreads-compat.c:45
+ #7 0x01192a96 in _hurdsig_init (intarray=0x102a000, intarraysize=5) at hurdsig.c:1499
+ #8 0x0117b9f8 in _hurd_new_proc_init (argv=0x15ffb88, intarray=0x102a000, intarraysize=5) at hurdinit.c:138
+ #9 0x0117bfef in _hurd_init (flags=8, argv=0x15ffb88, portarray=0x1029000, portarraysize=6, intarray=0x102a000, intarraysize=5) at hurdinit.c:94
+ #10 0x011a47c4 in init1 (argc=1, arg0=0x1025000 "/media/erich/home/thomas/tmp/gcc/hurd/master.build/i686-unknown-gnu0.3/libmudflap/testsuite/pass37-frag.exe") at ../sysdeps/mach/hurd/i386/init-first.c:136
+ #11 0x00001ec6 in _dl_start_user () from /lib/ld.so
+
+ pthread/cthreads-compat.c:
+
+ 38 cthread_t
+ 39 cthread_fork (cthread_fn_t func, void *arg)
+ 40 {
+ 41 pthread_t thread;
+ 42 int err;
+ 43
+ 44 err = pthread_create (&thread, NULL, func, arg);
+ 45 assert_perror (err);
+
+ Breakpoint 2, cthread_fork (func=0x118b0b0 <_hurd_msgport_receive>, arg=0x0) at ./pthread/cthreads-compat.c:44
+ 44 err = pthread_create (&thread, NULL, func, arg);
+ (gdb) info threads
+ Id Target Id Frame
+ * 4 Thread 17597.16 cthread_fork (func=0x118b0b0 <_hurd_msgport_receive>, arg=0x0) at ./pthread/cthreads-compat.c:44
+ (gdb) s
+ 40 {
+ (gdb)
+ 44 err = pthread_create (&thread, NULL, func, arg);
+ (gdb)
+
+ Breakpoint 1, pthread_create (thr=0x15ffa70, attr=0x0, start=0x118b0b0 <_hurd_msgport_receive>, arg=0x0) at ../../../master/libmudflap/mf-hooks3.c:272
+ 272 {
+ (gdb) s
+ 275 TRACE ("pthread_create\n");
+ (gdb)
+ 278 si = CALL_REAL (malloc, sizeof (*si));
+ (gdb) n
+ 279 si->user_fn = start;
+ (gdb)
+ 283 return CALL_REAL (pthread_create, thr, attr, __mf_pthread_spawner, si);
+ (gdb) s
+ 279 si->user_fn = start;
+ (gdb)
+ 280 si->user_arg = arg;
+ (gdb)
+ 283 return CALL_REAL (pthread_create, thr, attr, __mf_pthread_spawner, si);
+ (gdb)
+ 280 si->user_arg = arg;
+ (gdb)
+ 283 return CALL_REAL (pthread_create, thr, attr, __mf_pthread_spawner, si);
+ (gdb)
+ __mf_0fn_pthread_create (thr=thr@entry=0x15ffa70, attr=attr@entry=0x0, start=start@entry=0x1041070 <__mf_pthread_spawner>, arg=arg@entry=0x108e520 <__mf_0fn_bufs+12288>) at ../../../master/libmudflap/mf-hooks3.c:265
+ 265 }
+ (gdb) s
+ pthread_create (thr=0x15ffa70, attr=0x0, start=0x118b0b0 <_hurd_msgport_receive>, arg=0x0) at ../../../master/libmudflap/mf-hooks3.c:284
+ 284 }
+ (gdb) s
+ cthread_fork (func=0x118b0b0 <_hurd_msgport_receive>, arg=0x0) at ./pthread/cthreads-compat.c:45
+ 45 assert_perror (err);
+ (gdb) s
+ __assert_perror_fail (errnum=-1, file=0x1133969 "./pthread/cthreads-compat.c", line=45, function=0x1133985 <__PRETTY_FUNCTION__.5356> "cthread_fork") at assert-perr.c:55
+
+ Is this `libmudflap/mf-hooks3.c:__mf_0fn_pthread_create`, *a special
+ bootstrap variant*, that indeed just returns `-1`?
+
+ * `[ARCH]/libstdc++-v3`
+
+ FAIL: libstdc++-abi/abi_check
+
+ TODO.
+
+ $ readelf --symbols --wide i686-unknown-gnu0.3/./libstdc++-v3/src/.libs/libstdc++.so | grep pthread_mutex
+ 1065: 00000000 0 FUNC WEAK DEFAULT UND pthread_mutex_unlock@GLIBC_2.13_DEBIAN_31 (37)
+ 2515: 00000000 0 FUNC WEAK DEFAULT UND pthread_mutex_lock@GLIBC_2.13_DEBIAN_31 (37)
+ 2978: 00068430 15 FUNC GLOBAL DEFAULT 11 _ZNSt12__basic_fileIcEC2EP15__pthread_mutex@@GLIBCXX_3.4
+ 3790: 00068430 15 FUNC GLOBAL DEFAULT 11 _ZNSt12__basic_fileIcEC1EP15__pthread_mutex@@GLIBCXX_3.4
+ 2085: 00000000 0 FUNC WEAK DEFAULT UND pthread_mutex_unlock@@GLIBC_2.13_DEBIAN_31
+ 3535: 00000000 0 FUNC WEAK DEFAULT UND pthread_mutex_lock@@GLIBC_2.13_DEBIAN_31
+ 3998: 00068430 15 FUNC GLOBAL DEFAULT 11 _ZNSt12__basic_fileIcEC2EP15__pthread_mutex
+ 4810: 00068430 15 FUNC GLOBAL DEFAULT 11 _ZNSt12__basic_fileIcEC1EP15__pthread_mutex
+
+ `_ZNSt12__basic_fileIcEC1EP15__pthread_mutex`
+ (`std::__basic_file<char>::__basic_file(__pthread_mutex*)`), but
+ `_ZNSt12__basic_fileIcEC2EP15pthread_mutex_t`
+ (`std::__basic_file<char>::__basic_file(pthread_mutex_t*)`) is expected.
+
+ FAIL: 22_locale/time_get/get_date/wchar_t/4.cc execution test
+ FAIL: 27_io/basic_filebuf/close/char/4879.cc execution test
+ FAIL: 27_io/basic_filebuf/close/char/9964.cc execution test
+ FAIL: 27_io/basic_filebuf/imbue/char/13171-2.cc execution test
+ FAIL: 27_io/basic_filebuf/imbue/wchar_t/14975-2.cc execution test
+ WARNING: program timed out.
+ FAIL: 27_io/basic_filebuf/open/char/9507.cc execution test
+ FAIL: 27_io/basic_filebuf/seekoff/char/26777.cc execution test
+ WARNING: program timed out.
+ FAIL: 27_io/basic_filebuf/showmanyc/char/9533-1.cc execution test
+ FAIL: 27_io/basic_filebuf/underflow/char/10097.cc execution test
+ FAIL: 27_io/objects/char/7.cc execution test
+ FAIL: 27_io/objects/char/9661-1.cc execution test
+ FAIL: 27_io/objects/wchar_t/7.cc execution test
+ FAIL: 27_io/objects/wchar_t/9661-1.cc execution test
+ FAIL: 30_threads/async/42819.cc execution test
+ FAIL: 30_threads/async/49668.cc execution test
+ FAIL: 30_threads/async/54297.cc execution test
+ FAIL: 30_threads/async/any.cc execution test
+ FAIL: 30_threads/async/async.cc execution test
+ FAIL: 30_threads/async/sync.cc execution test
+ FAIL: 30_threads/call_once/39909.cc execution test
+ FAIL: 30_threads/call_once/49668.cc execution test
+ FAIL: 30_threads/call_once/call_once1.cc execution test
+ FAIL: 30_threads/condition_variable/54185.cc execution test
+ FAIL: 30_threads/condition_variable_any/50862.cc execution test
+ FAIL: 30_threads/condition_variable_any/53830.cc execution test
+ FAIL: 30_threads/future/members/45133.cc execution test
+ FAIL: 30_threads/future/members/get.cc execution test
+ FAIL: 30_threads/future/members/get2.cc execution test
+ FAIL: 30_threads/future/members/share.cc execution test
+ FAIL: 30_threads/future/members/valid.cc execution test
+ FAIL: 30_threads/future/members/wait.cc execution test
+ FAIL: 30_threads/future/members/wait_for.cc execution test
+ FAIL: 30_threads/future/members/wait_until.cc execution test
+ FAIL: 30_threads/lock/2.cc execution test
+ FAIL: 30_threads/lock/4.cc execution test
+ FAIL: 30_threads/mutex/try_lock/2.cc execution test
+ FAIL: 30_threads/packaged_task/49668.cc execution test
+ FAIL: 30_threads/packaged_task/cons/3.cc execution test
+ FAIL: 30_threads/packaged_task/cons/alloc.cc execution test
+ FAIL: 30_threads/packaged_task/members/get_future.cc execution test
+ FAIL: 30_threads/packaged_task/members/invoke.cc execution test
+ FAIL: 30_threads/packaged_task/members/invoke2.cc execution test
+ FAIL: 30_threads/packaged_task/members/invoke3.cc execution test
+ FAIL: 30_threads/packaged_task/members/invoke4.cc execution test
+ FAIL: 30_threads/packaged_task/members/invoke5.cc execution test
+ FAIL: 30_threads/packaged_task/members/reset2.cc execution test
+ FAIL: 30_threads/promise/cons/alloc.cc execution test
+ FAIL: 30_threads/promise/cons/move.cc execution test
+ FAIL: 30_threads/promise/cons/move_assign.cc execution test
+ FAIL: 30_threads/promise/members/get_future.cc execution test
+ FAIL: 30_threads/promise/members/set_exception.cc execution test
+ FAIL: 30_threads/promise/members/set_exception2.cc execution test
+ FAIL: 30_threads/promise/members/set_value.cc execution test
+ FAIL: 30_threads/promise/members/set_value2.cc execution test
+ FAIL: 30_threads/promise/members/set_value3.cc execution test
+ FAIL: 30_threads/promise/members/swap.cc execution test
+ FAIL: 30_threads/shared_future/members/get.cc execution test
+ FAIL: 30_threads/shared_future/members/get2.cc execution test
+ FAIL: 30_threads/shared_future/members/valid.cc execution test
+ FAIL: 30_threads/shared_future/members/wait.cc execution test
+ FAIL: 30_threads/shared_future/members/wait_for.cc execution test
+ FAIL: 30_threads/shared_future/members/wait_until.cc execution test
+ FAIL: 30_threads/this_thread/3.cc execution test
+ FAIL: 30_threads/this_thread/4.cc execution test
+ FAIL: 30_threads/thread/cons/2.cc execution test
+ FAIL: 30_threads/thread/cons/3.cc execution test
+ FAIL: 30_threads/thread/cons/4.cc execution test
+ FAIL: 30_threads/thread/cons/49668.cc execution test
+ FAIL: 30_threads/thread/cons/5.cc execution test
+ FAIL: 30_threads/thread/cons/6.cc execution test
+ FAIL: 30_threads/thread/cons/7.cc execution test
+ FAIL: 30_threads/thread/cons/8.cc execution test
+ FAIL: 30_threads/thread/cons/9.cc execution test
+ FAIL: 30_threads/thread/cons/moveable.cc execution test
+ FAIL: 30_threads/thread/members/1.cc execution test
+ FAIL: 30_threads/thread/members/2.cc execution test
+ FAIL: 30_threads/thread/members/3.cc execution test
+ FAIL: 30_threads/thread/native_handle/cancel.cc execution test
+ FAIL: 30_threads/thread/swap/1.cc execution test
+ FAIL: 30_threads/timed_mutex/try_lock/2.cc execution test
+ FAIL: 30_threads/timed_mutex/try_lock_for/3.cc execution test
+ FAIL: 30_threads/timed_mutex/try_lock_until/2.cc execution test
+ FAIL: 30_threads/try_lock/2.cc execution test
+ FAIL: 30_threads/try_lock/4.cc execution test
+
+ TODO. Perhaps just timeouts? [[!message-id
+ "200609052027.NAA09861@hpsje.cup.hp.com"]]. [[!message-id
+ "1227217275.6205.6.camel@janis-laptop"]]. If needed, can re-implement in
+ GCC DejaGnu's `remote.exp:remote_wait` to get rid of (that is, ignore) its
+ `timeout` parameter which, in DejaGnu code, is often invoked with a
+ hard-coded value (that we may want to override) (or is that what
+ `gcc/testsuite/lib/timeout.exp:standard_wait` is for?). While at it,
+ `libmudflap/testsuite/libmudflap.c++/ctors.exp` and
+ `libmudflap/testsuite/libmudflap.c/externs.exp` use hard-coded timeout
+ values in `remote_wait` calls (also, why don't these use the usual way of
+ running tests?).
* What is `gcc/testsuite/gcc.test-framework/test-framework.exp` and should we
define `CHECK_TEST_FRAMEWORK` to run these tests?
- * TODO
-
## Enhancements
diff --git a/open_issues/gccgo.mdwn b/open_issues/gccgo.mdwn
index 42502c67..0861d27f 100644
--- a/open_issues/gccgo.mdwn
+++ b/open_issues/gccgo.mdwn
@@ -38,6 +38,15 @@ been working on this, has some (unpublished) patches, and this was blocked on
[[`getcontext`/`makecontext`/`setcontext`/`swapcontext`|glibc]].
+### IRC, freenode, #hurd, 2013-08-26
+
+ < gnu_srs> tschwinge: on
+ http://www.gnu.org/software/hurd/open_issues/gccgo.html you might change
+ the text, my patches are published
+ < gnu_srs> http://lists.gnu.org/archive/html/bug-hurd/2013-06/msg00050.html
+ to msg00052.html
+
+
## `getcontext`/`makecontext`/`setcontext`/`swapcontext` usage analysis
In context of [[glibc/t/tls-threadvar]]. Looking at GCC trunk commit
diff --git a/open_issues/gdb.mdwn b/open_issues/gdb.mdwn
index 62d9e435..07b3da45 100644
--- a/open_issues/gdb.mdwn
+++ b/open_issues/gdb.mdwn
@@ -27,14 +27,14 @@ Here's what's to be done for maintaining GNU GDB.
<!--
git checkout reviewed
-git diff --patience --stat=$COLUMNS,$COLUMNS --patch --src-prefix=./ --dst-prefix=./ --find-renames --ignore-space-change ..sourceware/master | awk '/^diff/ { c = $0; } /^@@/ { print c; } { print; }' | less
+git diff --patience --stat=$COLUMNS,$COLUMNS --patch --src-prefix=./ --dst-prefix=./ --find-renames --ignore-space-change ..sourceware/master | awk '/^diff/ { c = " " $0; } /^@@/ { print c; } { print; }' | less
-i
/^---.*/([^.]*|.*\.texi.*|[^/]*gnu[^/]*)$|hurd|linux|nacl|nptl|glibc|gs:
-->
-Last reviewed up to the [[Git mirror's 3eb2a51c46804f0be530893e94af559abee8b4f8
-(2013-05-27) sources|source_repositories/gdb]].
+Last reviewed up to the [[Git mirror's d19fd090b7ec0a60846c52cad9fc0c24c3729565
+(2013-09-18) sources|source_repositories/gdb]].
* Globally
@@ -66,16 +66,23 @@ Last reviewed up to the [[Git mirror's 3eb2a51c46804f0be530893e94af559abee8b4f8
* Configure so that Debian system's `/usr/lib/debug/[...]` will be loaded
automatically.
+ * old-style function definition
+
+ ../../Ferry_Tagscherer/gdb/gnu-nat.c: In function 'trace_me':
+ ../../Ferry_Tagscherer/gdb/gnu-nat.c:2106:8: warning: old-style function definition [-Wold-style-definition]
+ void trace_me ()
+ ^
+
# Build
Here's a log of a GDB build run; this is from our [[Git
repository|source_repositories/gdb]]'s `tschwinge/Ferry_Tagscherer` branch,
-commit 3eb2a51c46804f0be530893e94af559abee8b4f8 (2013-05-27), run on
+commit d19fd090b7ec0a60846c52cad9fc0c24c3729565 (2013-09-18), run on
kepler.SCHWINGE and coulomb.SCHWINGE.
$ export LC_ALL=C
- $ ../Ferry_Tagscherer/configure --prefix="$PWD".install SHELL=/bin/dash CC=gcc-4.7 CXX=g++-4.7 --disable-werror 2>&1 | tee log_build
+ $ ../Ferry_Tagscherer/configure --prefix="$PWD".install SHELL=/bin/dash CC=gcc-4.8 CXX=g++-4.8 --disable-werror 2>&1 | tee log_build
[...]
$ make 2>&1 | tee log_build_
[...]
@@ -87,7 +94,7 @@ There are several occurences of *error: dereferencing type-punned pointer will
break strict-aliasing rules* in the MIG-generated stub files; thus no `-Werror`
until that is resolved ([[strict_aliasing]]).
-This takes up around 220 MiB and needs roughly 8 min on kepler.SCHWINGE and 25
+This takes up around 230 MiB and needs roughly 8 min on kepler.SCHWINGE and 31
min on coulomb.SCHWINGE.
<!--
@@ -168,11 +175,6 @@ formats and more emulation vectors.
exc_request_S.c:157:24: warning: no previous prototype for 'exc_server' []
exc_request_S.c:193:28: warning: no previous prototype for 'exc_server_routine' []
- * `dlopen`/`-ldl`
-
- -checking for library containing dlopen... none required
- +checking for library containing dlopen... -ldl
-
* `O_NOFOLLOW`
First seen in
@@ -183,8 +185,6 @@ formats and more emulation vectors.
[[!taglink open_issue_glibc]]?
- * Why does GNU/Linux have an additional `-ldl -rdynamic` when linking `gdb`?
-
* 00e1c4298d2b6fe040a9a970e98349602b12ddbf..6b25dae901ddedb3f330803542d3eac73cdcae4b:
+../../Ferry_Tagscherer/gdb/gnu-nat.c: In function 'info_port_rights':
@@ -200,7 +200,7 @@ formats and more emulation vectors.
$ make install 2>&1 | tee log_install
[...]
-This takes up around 50 MiB, and needs roughly 1 min on kepler.SCHWINGE and 2
+This takes up around 50 MiB, and needs roughly 1 min on kepler.SCHWINGE and 3
min on coulomb.SCHWINGE.
@@ -216,7 +216,7 @@ min on coulomb.SCHWINGE.
$ make -k check 2>&1 | tee log_test
[...]
-This needs roughly 14 min on kepler.SCHWINGE and 70 min on coulomb.SCHWINGE.
+This needs roughly 15 min on kepler.SCHWINGE and 72 min on coulomb.SCHWINGE.
When running `make -k check 2>&1 | tee log_test`, at the end of the testsuite
the `tee` process does not terminate if there are still stray leftover
@@ -229,6 +229,12 @@ GNU/Hurd, these generally are `gdb.multi/watchpoint-multi`, and an unknown
## Analysis
+The testsuite uses the system's default `gcc` (and similar) compilers, not
+those specified on the `configure` line ([[!taglink open_issue_gdb]]?), see
+`find_gcc` (and similar) usage in the testsuite and DejaGnu. Maybe something
+like `gdb/testsuite/boards/cc-with-tweaks.exp` would help, or setting
+`CC_FOR_TARGET` (and similar) per `gdb/testsuite/lib/future.exp`?
+
$ toolchain/logs/process gdb test
* `gdb.base/attach-pie-misread.exp`
@@ -289,6 +295,15 @@ GNU/Hurd, these generally are `gdb.multi/watchpoint-multi`, and an unknown
At this point, the system hangs; no new processes can be spawned, so
perhaps an issue with the exec server.
+ * `gdb.threads/manythreads.exp`
+
+ [[!taglink open_issue_libpthread]]. Perhaps fails due to pthread
+ attributes usage? Doesn't execute properly:
+
+ $ gdb/testsuite/gdb.threads/manythreads
+ manythreads: ../libpthread/sysdeps/mach/pt-thread-halt.c:51: __pthread_thread_halt: Unexpected error: (ipc/rcv) invalid name.
+ Killed
+
* Linux syscall usage, `<asm/unistd.h>`
* `UNSUPPORTED: gdb.threads/ia64-sigill.exp: Couldn't compile ../../../master/gdb/testsuite/gdb.threads/ia64-sigill.c: unrecognized error`
@@ -347,6 +362,18 @@ GNU/Hurd, these generally are `gdb.multi/watchpoint-multi`, and an unknown
Cannot access memory at address 0x6c62616e
(gdb) testcase ../../../Ferry_Tagscherer/gdb/testsuite/gdb.base/attach-pie-noexec.exp completed in 3 seconds
+ IRC, freenode, #hurd, 2013-09-06:
+
+ <gnu_srs1> How to debug a program that works in the shell but Cannot
+ access memory at address ... in gdb?
+ <tschwinge> Build it without -pie -- but that is just a guess of what
+ might be going on.
+ * tschwinge clearly has spent enough time with obscure things to be
+ able to make such guesses.
+ <gnu_srs1> tschwinge: looks like -fPIE is used.
+ <gnu_srs1> verified: some (all?) executables compiled with -fPIE, -fpie
+ and linked with -pie cannot be debugged in gdb :(
+
* `solib-event stop`
Running ../../../Ferry_Tagscherer/gdb/testsuite/gdb.mi/mi-catch-load.exp ...
@@ -363,15 +390,203 @@ GNU/Hurd, these generally are `gdb.multi/watchpoint-multi`, and an unknown
*stopped,reason="signal-received",signal-name="SIGSEGV",signal-meaning="Segmentation fault",frame={addr="0x00014add",func="??",args=[],from="/lib/ld.so"},thread-id="4",stopped-threads="all"
- * `gdb.arch/i386-float.exp: info float`
+ * `gdb.base/call-signal-resume.exp`
+
+ $ gdb -q gdb/testsuite/gdb.base/call-signals
+ (gdb) break stop_one
+ (gdb) r
+ (gdb) call gen_signal()
+ (gdb) bt
+ (gdb) frame [<function called from gdb>]
+ (gdb) return
+ (gdb) break handle_signal
+ (gdb) c
+ (gdb) c
+
+ kepler.SCHWINGE:
+
+ Breakpoint 2, handle_signal (sig=6) at ../../../Ferry_Tagscherer/gdb/testsuite/gdb.base/call-signals.c:28
+ 28 }
+ (gdb) bt
+ #0 handle_signal (sig=6) at ../../../Ferry_Tagscherer/gdb/testsuite/gdb.base/call-signals.c:28
+ #1 <signal handler called>
+ #2 0xb7fde416 in __kernel_vsyscall ()
+ #3 0xb7dffd96 in kill () at ../sysdeps/unix/syscall-template.S:81
+ #4 0x0804859c in gen_signal () at ../../../Ferry_Tagscherer/gdb/testsuite/gdb.base/call-signals.c:35
+ #5 0x08048610 in main () at ../../../Ferry_Tagscherer/gdb/testsuite/gdb.base/call-signals.c:81
+
+ coulomb.SCHWINGE:
+
+ Breakpoint 2, handle_signal (sig=6) at ../../../Ferry_Tagscherer/gdb/testsuite/gdb.base/call-signals.c:28
+ 28 }
+ (gdb) bt
+ #0 handle_signal (sig=6) at ../../../Ferry_Tagscherer/gdb/testsuite/gdb.base/call-signals.c:28
+ #1 0x010baac2 in trampoline () from /lib/i386-gnu/libc.so.0.3
+ #2 0x00000006 in ?? ()
+ #3 0x00000000 in ?? ()
+
+ kepler.SCHWINGE:
+
+ (gdb) c
+ Continuing.
+ no signal
+ [Inferior 1 (process 10401) exited normally]
+
+ coulomb.SCHWINGE:
+
+ (gdb) c
+ Continuing.
+ no signal
+
+ Program received signal SIGSEGV, Segmentation fault.
+ 0x00000000 in ?? ()
+ (gdb) bt
+ #0 0x00000000 in ?? ()
+ #1 0x01116c28 in _IO_acquire_lock_fct (p=<synthetic pointer>) at libioP.h:905
+ #2 _IO_puts (str=0x80487e0 "no signal") at ioputs.c:45
+ #3 0x080486d8 in gen_signal () at ../../../Ferry_Tagscherer/gdb/testsuite/gdb.base/call-signals.c:38
+ #4 0x0804873d in main () at ../../../Ferry_Tagscherer/gdb/testsuite/gdb.base/call-signals.c:81
+
+ This is apparently new with the glibc 2.17 upgrade. If not doing the
+ manual `gen_signal` call, it works fine. TODO.
+
+ * `gdb.base/relativedebug.exp`
+
+ (gdb) PASS: gdb.base/relativedebug.exp: continue
+ bt
+ #0 0x010a1afc in ?? () from /lib/i386-gnu/libc.so.0.3
+ #1 0x010a23be in mach_msg () from /lib/i386-gnu/libc.so.0.3
+ #2 0x0126cd98 in msg_sig_post () from /lib/i386-gnu/libhurduser.so.0.3
+ #3 0x010e2141 in ?? () from /lib/i386-gnu/libc.so.0.3
+ #4 0x010e23ed in kill () from /lib/i386-gnu/libc.so.0.3
+ #5 0x010e17f4 in raise () from /lib/i386-gnu/libc.so.0.3
+ #6 0x010e5b7c in abort () from /lib/i386-gnu/libc.so.0.3
+ #7 0x08048607 in handler (signo=14) at ../../../Ferry_Tagscherer/gdb/testsuite/gdb.base/relativedebug.c:25
+ #8 0x010bdac2 in ?? () from /lib/i386-gnu/libc.so.0.3
+ Backtrace stopped: previous frame inner to this frame (corrupt stack?)
+ (gdb) FAIL: gdb.base/relativedebug.exp: pause found in backtrace
+
+ This is apparently new with the glibc 2.17 upgrade. Previously it said:
+
+ (gdb) PASS: gdb.base/relativedebug.exp: continue
+ bt
+ #0 0x0107c85c in ?? () from /lib/i386-gnu/libc.so.0.3
+ #1 0x0107d069 in mach_msg () from /lib/i386-gnu/libc.so.0.3
+ #2 0x01220d4f in msg_sig_post () from /lib/i386-gnu/libhurduser.so.0.3
+ #3 0x010bb683 in ?? () from /lib/i386-gnu/libc.so.0.3
+ #4 0x010bb8f6 in kill () from /lib/i386-gnu/libc.so.0.3
+ #5 0x010bad76 in raise () from /lib/i386-gnu/libc.so.0.3
+ #6 0x010bf029 in abort () from /lib/i386-gnu/libc.so.0.3
+ #7 0x08048597 in handler (signo=14) at ../../../Ferry_Tagscherer/gdb/testsuite/gdb.base/relativedebug.c:25
+ #8 0x01098282 in ?? () from /lib/i386-gnu/libc.so.0.3
+ #9 0x010bbe5a in sigsuspend () from /lib/i386-gnu/libc.so.0.3
+ #10 0x0112fee1 in pause () from /lib/i386-gnu/libc.so.0.3
+ #11 0x080485c5 in main () at ../../../Ferry_Tagscherer/gdb/testsuite/gdb.base/relativedebug.c:32
+ (gdb) PASS: gdb.base/relativedebug.exp: pause found in backtrace
+
+ TODO.
+
+ * `gdb.gdb/selftest.exp`
+
+ (gdb) PASS: gdb.gdb/selftest.exp: send SIGINT signal to child process
+ backtrace
+ #0 0x0146fafc in ?? () from /lib/i386-gnu/libc.so.0.3
+ #1 0x014703be in mach_msg () from /lib/i386-gnu/libc.so.0.3
+ #2 0x0163bd98 in msg_sig_post () from /lib/i386-gnu/libhurduser.so.0.3
+ #3 0x014b0141 in ?? () from /lib/i386-gnu/libc.so.0.3
+ #4 0x014b03ed in kill () from /lib/i386-gnu/libc.so.0.3
+ #5 0x082cf471 in _rl_handle_signal (sig=2) at ../../Ferry_Tagscherer/readline/signals.c:221
+ #6 0x0148bac2 in ?? () from /lib/i386-gnu/libc.so.0.3
+ Backtrace stopped: previous frame inner to this frame (corrupt stack?)
+ (gdb) FAIL: gdb.gdb/selftest.exp: backtrace through signal handler
+
+ This is apparently new with the glibc 2.17 upgrade. Previously it said:
+
+ (gdb) PASS: gdb.gdb/selftest.exp: send SIGINT signal to child process
+ backtrace
+ #0 0x0144885c in ?? () from /lib/i386-gnu/libc.so.0.3
+ #1 0x01449069 in mach_msg () from /lib/i386-gnu/libc.so.0.3
+ #2 0x015ecd4f in msg_sig_post () from /lib/i386-gnu/libhurduser.so.0.3
+ #3 0x01487683 in ?? () from /lib/i386-gnu/libc.so.0.3
+ #4 0x014878f6 in kill () from /lib/i386-gnu/libc.so.0.3
+ #5 0x082cf401 in _rl_handle_signal (sig=2) at ../../Ferry_Tagscherer/readline/signals.c:221
+ #6 0x01464282 in ?? () from /lib/i386-gnu/libc.so.0.3
+ #7 0x0144fce3 in ?? () from /lib/i386-gnu/libc.so.0.3
+ #8 0x0153975b in poll () from /lib/i386-gnu/libc.so.0.3
+ #9 0x081c91c2 in gdb_wait_for_event (block=1) at ../../Ferry_Tagscherer/gdb/event-loop.c:804
+ #10 0x081c998f in gdb_do_one_event () at ../../Ferry_Tagscherer/gdb/event-loop.c:402
+ #11 0x081c9b07 in start_event_loop () at ../../Ferry_Tagscherer/gdb/event-loop.c:431
+ #12 0x081c2f42 in captured_command_loop (data=data@entry=0x0) at ../../Ferry_Tagscherer/gdb/main.c:260
+ #13 0x081c0e57 in catch_errors (func=func@entry=0x81c2f30 <captured_command_loop>, func_args=func_args@entry=0x0, errstring=errstring@entry=0x83
+ 5b81b "", mask=mask@entry=6) at ../../Ferry_Tagscherer/gdb/exceptions.c:546
+ #14 0x081c388c in captured_main (data=data@entry=0x19ff150) at ../../Ferry_Tagscherer/gdb/main.c:1055
+ #15 0x081c0e57 in catch_errors (func=func@entry=0x81c3130 <captured_main>, func_args=func_args@entry=0x19ff150, errstring=errstring@entry=0x835b
+ 81b "", mask=mask@entry=6) at ../../Ferry_Tagscherer/gdb/exceptions.c:546
+ #16 0x081c43c0 in gdb_main (args=0x19ff150) at ../../Ferry_Tagscherer/gdb/main.c:1064
+ #17 0x08099533 in main (argc=5, argv=0x19ff1e8) at ../../Ferry_Tagscherer/gdb/gdb.c:34
+ (gdb) PASS: gdb.gdb/selftest.exp: backtrace through signal handler
+
+ TODO.
+
+ * `gdb.python/python.exp`
+
+ >PASS: gdb.python/python.exp: post event insertion - gdb.post_event(Foo())
+ end
+ ERROR: Process no longer exists
+ UNRESOLVED: gdb.python/python.exp: post event insertion - end
+
+ This is apparently new with the glibc 2.17 upgrade. Previously it said:
+
+ >PASS: gdb.python/python.exp: post event insertion - gdb.post_event(Foo())
+ end
+ (gdb) PASS: gdb.python/python.exp: post event insertion - end
+ [More PASSes.]
+
+ TODO.
+
+ * `gdb.base/restore.exp`, `gdb.base/store.exp`
+
+ Several FAILs, starting with GCC 4.8 usage:
+
+ (gdb) PASS: gdb.base/restore.exp: caller3 calls callee1; return callee now
+ print l1
+ $16 = <optimized out>
+ (gdb) FAIL: gdb.base/restore.exp: caller3 calls callee1; return restored l1 to 32492
+
+ [[!GCC_PR 55056]], [[!message-id
+ "20130126202645.GA4888@host2.jankratochvil.net"]], and maybe [[!message-id
+ "CAO2gOZXvCLdaKE2=ZKpjGVGq8A0wQ94-AUo7eKvvWHWncrU_yg@mail.gmail.com"]] look
+ related.
- Running ../../../Ferry_Tagscherer/gdb/testsuite/gdb.arch/i386-float.exp ...
- PASS: gdb.arch/i386-float.exp: first stepi
- FAIL: gdb.arch/i386-float.exp: info float
- PASS: gdb.arch/i386-float.exp: second stepi
- PASS: gdb.arch/i386-float.exp: info float
+TODO.
- Only fails for GNU/Hurd: the floating point stack initially is not
- all-zeroes, which is expected, and which it is on GNU/Linux.
-TODO.
+# Open Issues
+
+## [[tag/open_issue_gdb]]
+
+## `info files` SIGSEGV
+
+[[!tag open_issue_gdb]]
+
+
+### IRC, freenode, #hurd, 2013-09-07
+
+ <rekado> I'm trying to debug pfinet, but I'm not very familiar with gdb.
+ Tried to attach to the running pfinet process (built with debug symbols),
+ set a breakpoint and ... when I ran "info files" the process segfaulted.
+ <teythoon> which process segfaults, pfinet or gdb?
+ <rekado> gdb segfaults.
+
+
+## Watchpoints
+
+[[!tag open_issue_gdb]]
+
+
+### IRC, freenode, #hurd, 2013-09-16
+
+ <gnu_srs> tschwinge: Is gdb watch known to fail on hurd? It hangs for me
+ when logged in via ssh.
+ <tschwinge> gnu_srs: Don't know about GDB's watch command. Are you sure it
+ is hanging?
diff --git a/open_issues/gdb_gcore.mdwn b/open_issues/gdb_gcore.mdwn
index 69211ac0..cadd9be1 100644
--- a/open_issues/gdb_gcore.mdwn
+++ b/open_issues/gdb_gcore.mdwn
@@ -1,4 +1,5 @@
-[[!meta copyright="Copyright © 2009, 2011 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2009, 2011, 2013 Free Software Foundation,
+Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -22,5 +23,8 @@ GDB's `gcore` command doesn't work / needs to be implemented / ported in GDB:
Undefined command: "gcore". Try "help".
gcore: failed to create core.8371
+Will probably need to implement `gdb/gdbarch.sh:gdb_signal_from_target`,
+`gdb/gdbarch.sh:gdb_signal_to_target`.
+
If someone is working in this area, they may want to port
<http://code.google.com/p/google-coredumper/>, too.
diff --git a/open_issues/gdb_signal_handler.mdwn b/open_issues/gdb_signal_handler.mdwn
new file mode 100644
index 00000000..3084f7e3
--- /dev/null
+++ b/open_issues/gdb_signal_handler.mdwn
@@ -0,0 +1,403 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+[[!tag open_issue_gdb open_issue_glibc]]
+
+
+# IRC, freenode, #hurd, 2013-07-07
+
+ <zyg> Hi, I'm in GDB inside a handler for SIGHUP, after stepping out, gdb
+ will hang on instruction: <_hurd_sigstate_lock+88>: xchg
+ %edx,0x4(%eax)
+ <zyg> here is my signal test pasted: http://pastebin.com/U72qw3FC
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <signal.h>
+
+ void *
+ my_handler(int signal, void *info, void *context)
+ {
+ printf("got SIGHUP\n");
+ return NULL;
+ }
+
+ void
+ install_handler (int signal)
+ {
+ struct sigaction sa;
+ sa.sa_sigaction = my_handler;
+ sa.sa_flags = SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ sigaction(signal, &sa, NULL);
+ }
+
+ void test_sighup(void)
+ {
+ raise(SIGHUP);
+ }
+
+ int main(int argc, char **argv){
+ install_handler(SIGHUP);
+ test_sighup();
+ exit(1);
+ }
+ <braunr> zyg: thanks
+ <braunr> zyg: what is the problem exactly ?
+ <braunr> zyg: i mean, does it hand before attaching with gdb ?
+ <zyg> braunr: it doesn't hang if runned without gdb. I've pasted here when
+ I step out of the handler, and get to the hanging instruction:
+ http://pastebin.com/nUyCx6Wj
+ $ gdb --args a.out
+ GNU gdb (GDB) 7.6-debian
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
+ This is free software: you are free to change and redistribute it.
+ There is NO WARRANTY, to the extent permitted by law. Type "show copying"
+ and "show warranty" for details.
+ This GDB was configured as "i486-gnu".
+ For bug reporting instructions, please see:
+ <http://www.gnu.org/software/gdb/bugs/>...
+ Reading symbols from /home/shrek/a.out...(no debugging symbols found)...done.
+ (gdb)
+
+ (gdb) display/i $pc
+ (gdb) handle SIGHUP pass stop print
+ Signal Stop Print Pass to program Description
+ SIGHUP Yes Yes Yes Hangup
+ (gdb)
+
+ (gdb) run
+ Starting program: /home/shrek/a.out
+ [New Thread 3571.5]
+
+ Program received signal SIGHUP, Hangup.
+ 0x010548ec in mach_msg_trap ()
+ at /build/buildd-eglibc_2.17-6-hurd-i386-g946kE/eglibc-2.17/build-tree/hurd-i386-libc/mach/mach_msg_trap.S:2
+ 2 /build/buildd-eglibc_2.17-6-hurd-i386-g946kE/eglibc-2.17/build-tree/hurd-i386-libc/mach/mach_msg_trap.S: No such file or directory.
+ 1: x/i $pc
+ => 0x10548ec <mach_msg_trap+12>: ret
+ (gdb)
+
+ (gdb) si
+ 0x0804862d in my_handler ()
+ 1: x/i $pc
+ => 0x804862d <my_handler>: push %ebp
+ (gdb) x/20xi 0x804862d
+ => 0x804862d <my_handler>: push %ebp
+ 0x804862e <my_handler+1>: mov %esp,%ebp
+ 0x8048630 <my_handler+3>: sub $0x18,%esp
+ 0x8048633 <my_handler+6>: movl $0x8048750,(%esp)
+ 0x804863a <my_handler+13>: call 0x8048500 <puts@plt>
+ 0x804863f <my_handler+18>: mov $0x0,%eax
+ 0x8048644 <my_handler+23>: leave
+ 0x8048645 <my_handler+24>: ret
+ 0x8048646 <install_handler>: push %ebp
+ 0x8048647 <install_handler+1>: mov %esp,%ebp
+ 0x8048649 <install_handler+3>: sub $0x28,%esp
+ 0x804864c <install_handler+6>: movl $0x804862d,-0x14(%ebp)
+ 0x8048653 <install_handler+13>: movl $0x40,-0xc(%ebp)
+ 0x804865a <install_handler+20>: lea -0x14(%ebp),%eax
+ 0x804865d <install_handler+23>: add $0x4,%eax
+ 0x8048660 <install_handler+26>: mov %eax,(%esp)
+ 0x8048663 <install_handler+29>: call 0x80484d0 <sigemptyset@plt>
+ 0x8048668 <install_handler+34>: movl $0x0,0x8(%esp)
+ 0x8048670 <install_handler+42>: lea -0x14(%ebp),%eax
+ 0x8048673 <install_handler+45>: mov %eax,0x4(%esp)
+ (gdb)
+
+ (gdb) break *0x804863f
+ Breakpoint 1 at 0x804863f
+ (gdb) c
+ Continuing.
+ got SIGHUP
+
+ Breakpoint 1, 0x0804863f in my_handler ()
+ 1: x/i $pc
+ => 0x804863f <my_handler+18>: mov $0x0,%eax
+ (gdb)
+
+ (gdb) si
+ 0x08048644 in my_handler ()
+ 1: x/i $pc
+ => 0x8048644 <my_handler+23>: leave
+ (gdb)
+ 0x08048645 in my_handler ()
+ 1: x/i $pc
+ => 0x8048645 <my_handler+24>: ret
+ (gdb)
+ 0x010708b2 in trampoline () from /lib/i386-gnu/libc.so.0.3
+ 1: x/i $pc
+ => 0x10708b2 <trampoline+2>: add $0xc,%esp
+ (gdb)
+ 0x010708b5 in trampoline () from /lib/i386-gnu/libc.so.0.3
+ 1: x/i $pc
+ => 0x10708b5 <trampoline+5>: ret
+ (gdb)
+ __sigreturn (scp=0x102988c) at ../sysdeps/mach/hurd/i386/sigreturn.c:30
+ 30 ../sysdeps/mach/hurd/i386/sigreturn.c: No such file or directory.
+ 1: x/i $pc
+ => 0x1096340 <__sigreturn>: push %ebp
+ (gdb)
+ 0x01096341 30 in ../sysdeps/mach/hurd/i386/sigreturn.c
+ 1: x/i $pc
+ => 0x1096341 <__sigreturn+1>: push %edi
+ (gdb)
+ 0x01096342 30 in ../sysdeps/mach/hurd/i386/sigreturn.c
+ 1: x/i $pc
+ => 0x1096342 <__sigreturn+2>: push %esi
+ (gdb)
+ 0x01096343 30 in ../sysdeps/mach/hurd/i386/sigreturn.c
+ 1: x/i $pc
+ => 0x1096343 <__sigreturn+3>: push %ebx
+ (gdb)
+ 0x01096344 30 in ../sysdeps/mach/hurd/i386/sigreturn.c
+ 1: x/i $pc
+ => 0x1096344 <__sigreturn+4>: sub $0x2c,%esp
+ (gdb)
+ 0x01096347 30 in ../sysdeps/mach/hurd/i386/sigreturn.c
+ 1: x/i $pc
+ => 0x1096347 <__sigreturn+7>: mov 0x40(%esp),%esi
+ (gdb)
+ 0x0109634b 30 in ../sysdeps/mach/hurd/i386/sigreturn.c
+ 1: x/i $pc
+ => 0x109634b <__sigreturn+11>: call 0x11a0609 <__x86.get_pc_thunk.bx>
+ (gdb)
+ 0x011a0609 in __x86.get_pc_thunk.bx () from /lib/i386-gnu/libc.so.0.3
+ 1: x/i $pc
+ => 0x11a0609 <__x86.get_pc_thunk.bx>: mov (%esp),%ebx
+ (gdb)
+ 0x011a060c in __x86.get_pc_thunk.bx () from /lib/i386-gnu/libc.so.0.3
+ 1: x/i $pc
+ => 0x11a060c <__x86.get_pc_thunk.bx+3>: ret
+ (gdb)
+ 0x01096350 in __sigreturn (scp=0x102988c) at ../sysdeps/mach/hurd/i386/sigreturn.c:30
+ 30 in ../sysdeps/mach/hurd/i386/sigreturn.c
+ 1: x/i $pc
+ => 0x1096350 <__sigreturn+16>: add $0x15ccb0,%ebx
+ (gdb)
+ 35 in ../sysdeps/mach/hurd/i386/sigreturn.c
+ 1: x/i $pc
+ => 0x1096356 <__sigreturn+22>: test %esi,%esi
+ (gdb)
+ 0x01096358 35 in ../sysdeps/mach/hurd/i386/sigreturn.c
+ 1: x/i $pc
+ => 0x1096358 <__sigreturn+24>: je 0x10964f0 <__sigreturn+432>
+ (gdb)
+ 0x0109635e 35 in ../sysdeps/mach/hurd/i386/sigreturn.c
+ 1: x/i $pc
+ => 0x109635e <__sigreturn+30>: testl $0x10100,0x4(%esi)
+ (gdb)
+ 0x01096365 35 in ../sysdeps/mach/hurd/i386/sigreturn.c
+ 1: x/i $pc
+ => 0x1096365 <__sigreturn+37>: jne 0x10964f0 <__sigreturn+432>
+ (gdb)
+ __hurd_threadvar_location_from_sp (__sp=<optimized out>, __index=<optimized out>) at ../hurd/hurd/threadvar.h:94
+ 94 ../hurd/hurd/threadvar.h: No such file or directory.
+ 1: x/i $pc
+ => 0x109636b <__sigreturn+43>: mov -0x38(%ebx),%ebp
+ (gdb)
+ __hurd_threadvar_location (__index=_HURD_THREADVAR_SIGSTATE) at ../hurd/hurd/threadvar.h:116
+ 116 in ../hurd/hurd/threadvar.h
+ 1: x/i $pc
+ => 0x1096371 <__sigreturn+49>: mov %esp,%edx
+ (gdb)
+ __hurd_threadvar_location_from_sp (__sp=0x1029848, __index=_HURD_THREADVAR_SIGSTATE) at ../hurd/hurd/threadvar.h:94
+ 94 in ../hurd/hurd/threadvar.h
+ 1: x/i $pc
+ => 0x1096373 <__sigreturn+51>: cmp 0x0(%ebp),%esp
+ (gdb)
+ 0x01096376 94 in ../hurd/hurd/threadvar.h
+ 1: x/i $pc
+ => 0x1096376 <__sigreturn+54>: jae 0x10964d0 <__sigreturn+400>
+ (gdb)
+ 0x0109637c 94 in ../hurd/hurd/threadvar.h
+ 1: x/i $pc
+ => 0x109637c <__sigreturn+60>: mov -0x15c(%ebx),%eax
+ (gdb)
+ 0x01096382 94 in ../hurd/hurd/threadvar.h
+ 1: x/i $pc
+ => 0x1096382 <__sigreturn+66>: and (%eax),%edx
+ (gdb)
+ 0x01096384 94 in ../hurd/hurd/threadvar.h
+ 1: x/i $pc
+ => 0x1096384 <__sigreturn+68>: mov -0x90(%ebx),%eax
+ (gdb)
+ 0x0109638a 94 in ../hurd/hurd/threadvar.h
+ 1: x/i $pc
+ => 0x109638a <__sigreturn+74>: add (%eax),%edx
+ (gdb)
+ _hurd_self_sigstate () at ../hurd/hurd/signal.h:165
+ 165 ../hurd/hurd/signal.h: No such file or directory.
+ 1: x/i $pc
+ => 0x109638c <__sigreturn+76>: mov 0x8(%edx),%edi
+ (gdb)
+ 0x0109638f 165 in ../hurd/hurd/signal.h
+ 1: x/i $pc
+ => 0x109638f <__sigreturn+79>: test %edi,%edi
+ (gdb)
+ 0x01096391 165 in ../hurd/hurd/signal.h
+ 1: x/i $pc
+ => 0x1096391 <__sigreturn+81>: je 0x1096598 <__sigreturn+600>
+ (gdb)
+ __sigreturn (scp=0x102988c) at ../sysdeps/mach/hurd/i386/sigreturn.c:42
+ 42 ../sysdeps/mach/hurd/i386/sigreturn.c: No such file or directory.
+ 1: x/i $pc
+ => 0x1096397 <__sigreturn+87>: mov %edi,(%esp)
+ (gdb)
+ 0x0109639a 42 in ../sysdeps/mach/hurd/i386/sigreturn.c
+ 1: x/i $pc
+ => 0x109639a <__sigreturn+90>: call 0x1051d70 <_hurd_sigstate_lock@plt>
+ (gdb)
+ 0x01051d70 in _hurd_sigstate_lock@plt () from /lib/i386-gnu/libc.so.0.3
+ 1: x/i $pc
+ => 0x1051d70 <_hurd_sigstate_lock@plt>: jmp *0x864(%ebx)
+ (gdb)
+ _hurd_sigstate_lock (ss=ss@entry=0x1244008) at hurdsig.c:170
+ 170 hurdsig.c: No such file or directory.
+ 1: x/i $pc
+ => 0x106bb90 <_hurd_sigstate_lock>: sub $0x1c,%esp
+ (gdb)
+ 0x0106bb93 170 in hurdsig.c
+ 1: x/i $pc
+ => 0x106bb93 <_hurd_sigstate_lock+3>: mov %ebx,0x14(%esp)
+ (gdb)
+ 0x0106bb97 170 in hurdsig.c
+ 1: x/i $pc
+ => 0x106bb97 <_hurd_sigstate_lock+7>: call 0x11a0609 <__x86.get_pc_thunk.bx>
+ (gdb)
+ 0x011a0609 in __x86.get_pc_thunk.bx () from /lib/i386-gnu/libc.so.0.3
+ 1: x/i $pc
+ => 0x11a0609 <__x86.get_pc_thunk.bx>: mov (%esp),%ebx
+ (gdb)
+ 0x011a060c in __x86.get_pc_thunk.bx () from /lib/i386-gnu/libc.so.0.3
+ 1: x/i $pc
+ => 0x11a060c <__x86.get_pc_thunk.bx+3>: ret
+ (gdb)
+ 0x0106bb9c in _hurd_sigstate_lock (ss=ss@entry=0x1244008) at hurdsig.c:170
+ 170 in hurdsig.c
+ 1: x/i $pc
+ => 0x106bb9c <_hurd_sigstate_lock+12>: add $0x187464,%ebx
+ (gdb)
+ 0x0106bba2 170 in hurdsig.c
+ 1: x/i $pc
+ => 0x106bba2 <_hurd_sigstate_lock+18>: mov %esi,0x18(%esp)
+ (gdb)
+ 170 in hurdsig.c
+ 1: x/i $pc
+ => 0x106bba6 <_hurd_sigstate_lock+22>: mov 0x20(%esp),%esi
+ (gdb)
+ sigstate_is_global_rcv (ss=0x1244008) at hurdsig.c:162
+ 162 in hurdsig.c
+ 1: x/i $pc
+ => 0x106bbaa <_hurd_sigstate_lock+26>: lea 0x57c0(%ebx),%eax
+ (gdb)
+ 0x0106bbb0 162 in hurdsig.c
+ 1: x/i $pc
+ => 0x106bbb0 <_hurd_sigstate_lock+32>: mov (%eax),%eax
+ (gdb)
+ 163 in hurdsig.c
+ 1: x/i $pc
+ => 0x106bbb2 <_hurd_sigstate_lock+34>: test %eax,%eax
+ (gdb)
+ 0x0106bbb4 163 in hurdsig.c
+ 1: x/i $pc
+ => 0x106bbb4 <_hurd_sigstate_lock+36>: je 0x106bbbc <_hurd_sigstate_lock+44>
+ (gdb)
+ 0x0106bbb6 163 in hurdsig.c
+ 1: x/i $pc
+ => 0x106bbb6 <_hurd_sigstate_lock+38>: cmpl $0x1,0x18(%esi)
+ (gdb)
+ 0x0106bbba 163 in hurdsig.c
+ 1: x/i $pc
+ => 0x106bbba <_hurd_sigstate_lock+42>: je 0x106bbe0 <_hurd_sigstate_lock+80>
+ (gdb)
+ _hurd_sigstate_lock (ss=ss@entry=0x1244008) at hurdsig.c:172
+ 172 in hurdsig.c
+ 1: x/i $pc
+ => 0x106bbe0 <_hurd_sigstate_lock+80>: lea 0x4(%eax),%ecx
+ (gdb)
+ __spin_try_lock (__lock=0x124480c) at ../sysdeps/mach/i386/machine-lock.h:59
+ 59 ../sysdeps/mach/i386/machine-lock.h: No such file or directory.
+ 1: x/i $pc
+ => 0x106bbe3 <_hurd_sigstate_lock+83>: mov $0x1,%edx
+ (gdb)
+ 0x0106bbe8 59 in ../sysdeps/mach/i386/machine-lock.h
+ 1: x/i $pc
+ => 0x106bbe8 <_hurd_sigstate_lock+88>: xchg %edx,0x4(%eax)
+ (gdb)
+ <braunr> zyg: i don't get what you mean
+ <braunr> are you starting it with gdb ?
+ <zyg> braunr: yes: "gdb --args a.out"
+ <braunr> ok
+ <braunr> can't reproduce it
+ <braunr> i get "Program received signal SIGHUP, Hangup.
+ <braunr> "
+ <braunr> then continue, then the program has exited
+ <zyg> braunr: do you run it in gdb or without?
+ <zyg> braunr: Ah "Program received signal SIGHUP, Hangup." is from
+ gdb.. try issue continue, not sure why gdb stops at SIGHUP (default?).
+ <braunr> 10:34 < braunr> then continue, then the program has exited
+ <braunr> gdb stops at signals
+ <zyg> braunr: yes, try repeating that, but instead of continue, just issue
+ "si"
+ <zyg> braunr: sorry.. you would need to remove that printf/fprintf, else it
+ gets too long. That's why I put a breakpoint.
+ <braunr> a breakpoint ?
+ <braunr> on the signal handler ?
+ <zyg> braunr: yes, put a break after having entered the handler. Or edit
+ the pasted C code an remove that printf("got SIGHUP\n");
+ <braunr> i'm not sure that's correctly supported
+ <braunr> and i can see why glibc would deadlock on the sigstate lock
+ <braunr> don't do that :p
+ <zyg> braunr: why does it deadlock?
+ <braunr> because both the signal handler and messages from gdb will cause
+ common code paths to be taken
+ <zyg> braunr: oh.. when I step instruction I'm inside an SIGTRAP handler
+ probably?
+ <braunr> possible
+ <braunr> i don't know the details but that's the kind of things i expect
+ <braunr> and signals on the hurd are definitely buggy
+ <braunr> i don't know if we support nesting them
+ <braunr> i'd say we don't
+ <zyg> braunr: I'll try to put a break beyond that xchg and continue
+ <braunr> xhcg is probably the sigstate spinlock
+ <braunr> xchg*
+ <braunr> you'd need to reach the unlock instruction, which is probably
+ executed once the handler has finished running
+ <zyg> braunr: yes :) ... one instruction beyond didn't help
+ <zyg> braunr: thanks alot, putting a break in __sigreturn, after that
+ function has called _hurd_sigstate_unlock@plt works!
+ <braunr> works ?
+ <braunr> what did you want to do ?
+ <zyg> braunr: I want to trace user code inside the signal handler, also how
+ we enter and how we leave.
+ <braunr> well you can't do that inside, so no it doesn't work for you :/
+ <braunr> but that's a start i guess
+ <zyg> braunr: I seem to do most normal things inside the handler,
+ step-instruction and put breaks.
+ <braunr> ?
+ <braunr> i thought that's what made the program deadlock
+ <zyg> braunr: as you said earlier, the deadlock came when i "step
+ instruction" into the area between _hurd_sigstate_lock and
+ _hurd_sigstate_unlock. Otherwise I havn't had any issues.
+ <braunr> but isn't the sigstate locked during the handler execution ?
+ <zyg> braunr: no it locks and unlocks in __sigreturn which is done when
+ leaving the handler.
+ <braunr> than how could it deadlock on handler entry ?
+ <braunr> or perhaps the fact your handler was empty made the entry point
+ directly reach __sigreturn
+ <braunr> hm no i don't buy it
+ <braunr> the sigstate must also be locked on entry
+ <zyg> braunr: there was never any problem with entering
+ <braunr> then describe the problem with more details please
+ <braunr> ah sorry
+ <zyg> braunr: are you sure? there is minimal user-code run before the
+ signal is going into the handler.
+ <braunr> you "step out of the handler"
diff --git a/open_issues/glibc.mdwn b/open_issues/glibc.mdwn
index d45f0d14..b453b44f 100644
--- a/open_issues/glibc.mdwn
+++ b/open_issues/glibc.mdwn
@@ -281,14 +281,55 @@ Last reviewed up to the [[Git mirror's 0323d08657f111267efa47bd448fbf6cd76befe8
[[glibc_madvise_vs_static_linking]].
+ IRC, OFTC, #debian-hurd, 2013-09-09:
+
+ <gg0> does hurd MADV_DONTNEED or MADV_FREE or none?
+ http://sources.debian.net/src/jemalloc/3.4.0-1/include/jemalloc/jemalloc_defs.h.in#L239
+ <gg0> seems it builds by defining JEMALLOC_PURGE_MADVISE_DONTNEED
+ but i don't know what i'm talking about, so it could build with
+ JEMALLOC_PURGE_MADVISE_FREE as well
+
+ IRC, OFTC, #debian-hurd, 2013-09-10:
+
+ <youpi> gg0: it implements none, even if it defines DONTNEED (but
+ not FREE)
+
+ See also:
+
+ gnash (0.8.11~git20130903-1) unstable; urgency=low
+
+ * Git snapshot.
+ + Embedded jemalloc copy has been replaced by system one.
+ [...]
+ - Disable jemalloc on hurd and kfreebsd-*. No longer disabled upstream.
+
* `msync`
Then define `_POSIX_MAPPED_FILES`, `_POSIX_SYNCHRONIZED_IO`.
- * `sys/epoll.h`
+ * `epoll`, `sys/epoll.h`
Used by [[wayland]], for example.
+ IRC, freenode, #hurd, 2013-08-08:
+
+ <nalaginrut> is there any possible to have kquque/epoll alike
+ things in hurd? or there is one?
+ <braunr> nalaginrut: use select/poll
+ <nalaginrut> is it possible to implement epoll?
+ <braunr> it is
+ <braunr> we don't care enough about it to do it
+ <braunr> (for now)
+ <nalaginrut> well, since I wrote a server with Guile, and it could
+ take advantage of epoll, never mind, if there's no, it'll use
+ select automatically
+ <nalaginrut> but if someday someone care about it, I'll be
+ interested on it
+ <braunr> epoll is a scalability improvement over poll
+ <braunr> the hurd being full of scalability issues, this one is
+ clearly not a priority
+ <nalaginrut> ok
+
* `sys/eventfd.h`
* `sys/inotify.h`
@@ -390,6 +431,429 @@ Last reviewed up to the [[Git mirror's 0323d08657f111267efa47bd448fbf6cd76befe8
libgc, libsigsegv, luatex, mono, nspr, pth, ruby1.8, texlive-bin, uim,
and more.
+ IRC, OFTC, #debian-hurd, 2013-09-08:
+
+ <pinotree> oh, and even ruby2.0 suffers because of fixed-stack
+ threads
+ <youpi> yes, we definitely need to finish fixing it
+ <youpi> my current work is in our glibc repo, youpi/tls-threadvar
+ <pinotree> | *** makecontext: a stack at 0xbc000 with size 0x40000
+ is not usable with threadvars
+ <pinotree> all 8 failing tests with that
+ <youpi> maybe we can hand-disable the use of contexts in ruby for
+ now?
+ <pinotree> gg0: ↑ :)
+ <gg0> after the pseudo-patch i RFCed, i don't deserve to say
+ anything else about that :)
+ <pinotree> i mean, feel free to investigate and "fix" ruby2.0 as
+ above :)
+ <gg0> eh maybe i'd just be able to hand-disable failing
+ thread-related _tests_ :)
+ <gg0> i'm still hoping some real developer picks and actually fixes
+ it, seems it's not enough interesting though
+ <azeem> 21:37 < youpi> yes, we definitely need to finish fixing it
+ <gg0> afaiu youpi is working on threadvars-tls migration, which
+ would mean fixing them all. i just meant fixing ruby, which would
+ mean having puppet btw
+ <youpi> gg0: "actually fixing" means fixing threadvars-tls
+ migration
+ <youpi> "just fixing" ruby can be done by simply disabling context
+ use in ruby
+
+ IRC, OFTC, #debian-hurd, 2013-09-10:
+
+ <gg0> this one fixes make test by disabling context and giving more
+ time to timing related tests http://paste.debian.net/plain/37977/
+ <gg0> make test-all is another story
+ <youpi> gg0: AIUI, the sleep part should get fixed by the next
+ glibc upload, which will include the getclk patch
+ <youpi> but the disabling context part could be good to submit to
+ the debian ruby package, mentioning that this is a workaround for
+ now
+ <gg0> unfortunately still not enough, test-all still fails
+ <youpi> does it make the package not build?
+ <gg0> test-all is the second part of what we call tests
+ <gg0> they build and package (they produce all ruby packages),
+ after that they run debian/run-test-suites.bash which is make
+ test + make test-all
+ <gg0> well after or during the build doesn't matter, it's their
+ testsuite
+ <gg0> ok just failed:
+ <gg0> TestBug4409#test_bug4409 = Illegal instruction
+ <gg0> make: *** [yes-test-all] Error 132
+ <gg0> what to do with Illegal instruction?
+ <gg0> just found 2 words that make everybody shut up :p
+ <pinotree> same as above: debug it
+ <teythoon> gg0: have you confirmed that this is reproducible? I've
+ once had a process die with SIGILL and it was not and I figured
+ it might have been a (qemu?) glitch
+ <gg0> seems i'm running tests which are disabled on _all_ archs,
+ better so
+ <gg0> well, this should be reproducible. i just got it on a qemu, i
+ could try to reproduce it on real hardware but as just said, i
+ was testing tests disabled by maintainer so completely useless
+ <teythoon> gg0: yeah, I'm running all my hurd instances on qemu/kvm
+ as well, I meant did you get this twice in a row?
+ <gg0> to be honest i got another illegal instruction months ago but
+ don't recall doing what
+ <gg0> nope not twice, i've commented it out. then run the remaining
+ and then found out i should not have done what i was doing
+ <gg0> but i could try to reproduce it
+ <gg0> ok now i recall i got it another one few hours ago on real
+ hardware, from logs:
+ <gg0> TestIO#test_copy_stream_socket = Illegal instruction
+ <gg0> teythoon: on real hardware though
+ <gg0> and this is the one i should debug once it finishes, still
+ running
+
+ IRC, freenode, #hurd, 2013-09-11:
+
+ <gg0_> ../sysdeps/mach/hurd/jmp-unwind.c:53: _longjmp_unwind:
+ Assertion `! __spin_lock_locked (&ss->critical_section_lock)'
+ failed.
+ <gg0_> and
+ <gg0_> ../libpthread/sysdeps/mach/pt-thread-halt.c:51:
+ __pthread_thread_halt: Unexpected error: (ipc/send) invalid
+ destination port.
+ <tschwinge> gg0_: Which libpthread source are these? Stock Debian
+ package?
+ <gg0_> tschwinge: everything debian, ruby rebuilt with
+ http://paste.debian.net/plain/38519/ which should disable
+ *context
+
+ IRC, OFTC, #debian-hurd, 2013-09-11:
+
+ <gg0_> wrt ruby, i'd propose a patch that disables *context and
+ comments out failed tests (a dozen). most of them are timing
+ related, don't always fail
+ <gg0_> if they failed gracefully, we could leave them enabled and
+ just ignoring testsuite result, but most of them block testsuite
+ run when fail
+ <gg0_> anyone against? any better idea (and intention to implement
+ it? :p)?
+ <gg0_> youpi: is disabling some tests acceptable? ^
+ <youpi> it'd be good to at least know what is failing
+ <youpi> so as to know what impact hiding these failures will have
+ <youpi> remember that hiding bugs usually means getting bitten by
+ them even harder later :)
+ <gg0_> many of them use pipes
+ <gg0_> here the final list, see commented out ones
+ http://paste.debian.net/plain/38426
+ <gg0_> and as said some don't always fails
+ <gg0_> test_copy_stream_socket uses a socket
+ <youpi> note that we can still at least build packages with notest
+ <youpi> at least to get the binaries uploaded
+ <youpi> disabling *context should however really be done
+ <youpi> and the pipe issues are concerning
+ <youpi> I don't remember other pipe issues
+ <youpi> so maybe it's a but in the ruby bindings
+ <gg0_> i just remember they didn't die, then something unknown
+ fixed it
+ <youpi> I see something frightening in io.c
+ <youpi> #if BSD_STDIO
+ <youpi> preserving_errno(fseeko(f, lseek(fileno(f),
+ (off_t)0, SEEK_CUR), SEEK_SET));
+ <youpi> #endif
+ <youpi> this looks very much like a workaround for an odd thing in
+ BSD
+ <youpi> it happens that that gets enabled on hurd too, since
+ __MACH__ is defined
+ <youpi> you could try to drop these three lines, just to see
+ <youpi> this is very probably very worth investigating, at any rate
+ <youpi> even just test_gets_limit_extra_arg is a very simple test,
+ that I fail to see why it should ever fail on hurd-i386
+ <youpi> starting debugging it would be a matter of putting printfs
+ in io.c, to check what gets called, with what parameters, etc.
+ <youpi> just a matter of taking the time to do it, it's not very
+ complex
+ <gg0_> youpi: are you looking at 1.8? no BSD_STDIO here
+ <youpi> yes, 1.8
+ <gg0_> 1.9.3.448
+ <gg0_> landed to sid few days ago
+ <youpi> ah, I have 1.87
+ <youpi> +.
+ <gg0_> my favourites are TestIO#test_copy_stream_socket and
+ TestIO#test_cross_thread_close_fd -> Illegal instruction
+ <gg0_> TestIO#test_io_select_with_many_files sometimes Illegal
+ instruction, sometimes ruby1.9.1:
+ ../sysdeps/mach/hurd/jmp-unwind.c:53: _longjmp_unwind: Assertion
+ `! __spin_lock_locked (&ss->critical_section_lock)' failed.
+
+ [[thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock]]?
+
+ <gg0_> trying to debug illegal instruction
+ http://paste.debian.net/plain/38585/
+ <gg0_> (yes, i'm not even good at gdbing)
+ <gg0_> any hint?
+ <gg0_> oh found out there's an intree .gdbinit, that might
+ complicate things
+
+ IRC, OFTC, #debian-hurd, 2013-09-13:
+
+ <gg0_> where should it be implemented MAP_STACK? plus, is it worth
+ doing it considering migration to tls, wouldn't it be useless?
+ <gg0_> sysdeps/mach/hurd/mmap.c i should reduce stupid questions
+ frequency from daily to weekly basis
+
+ IRC, OFTC, #debian-hurd, 2013-09-14:
+
+ <gg0_> say i managed to mmap 0x200000-aligned memory
+ <gg0_> now i get almost the same failed tests i get disabling
+ *context
+ <gg0_> that would mean they don't depend on threading
+
+ IRC, freenode, #hurd, 2013-09-16:
+
+ <gg0> i get many ../sysdeps/mach/hurd/jmp-unwind.c:53:
+ _longjmp_unwind: Assertion `! __spin_lock_locked
+ (&ss->critical_section_lock)' failed.
+ <gg0> by running ruby testsuite, especially during test_read* tests
+ http://sources.debian.net/src/ruby1.9.1/1.9.3.448-1/test/ruby/test_io.rb#L972
+ <gg0> read/write operations with pipes
+ <braunr> gg0: that's weird
+ <braunr> gg0: debian glibc ?
+ <gg0> braunr: yep, debian 2.17-92
+ <gg0> sometimes assertion above, sometimes tests in question get
+ stuck reading
+ <gg0> it would be nice reproducing it w/o ruby
+ <gg0> probably massive io on pipes could do the job
+ <gg0> also more nice finding someone who finds it interesting to
+ fix :p
+ <gg0> ruby is rebuilt with http://paste.debian.net/plain/40755/, no
+ *context
+ <gg0> pipe function in tests above creates one thread for write,
+ one for read
+ http://sources.debian.net/src/ruby1.9.1/1.9.3.448-1/test/ruby/test_io.rb#L26
+ <tschwinge> gg0: About the jmp-unwind assertion failure: is it be
+ chance this issue:
+ <http://www.gnu.org/software/hurd/open_issues/thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock.html>?
+ I didn't look in detail.
+ <braunr> tschwinge: that's what i thought too about the assertion,
+ which is why i find it strange
+ <gg0> asserting it's not locked then locking it doesn't exclude
+ race conditions
+
+ IRC, OFTC, #debian-hurd, 2013-09-17:
+
+ <gg0> youpi: i guess no one saw it anymore since
+ tg-thread-cancel.diff patch
+ <gg0> it =
+ http://www.gnu.org/software/hurd/open_issues/thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock.html
+ <gg0> this one comes from sysdeps/mach/hurd/jmp-unwind.c:53 though
+ <gg0> another assertion to remove?
+ <youpi> gg0: it's not exactly the same: in hurd_thread_cancel we
+ hold no lock at all at the assertion point
+ <youpi> in jmp-unwind.c, we do hold a lock
+ <youpi> and the assertion might be actually true because all other
+ threads are supposed to hold the first lock before taking the
+ other one
+ <youpi> you could check for that in other places
+ <youpi> and maybe it's the other place which wouldhave to be fixed
+ <youpi> also look for documentation which would say that
+
+ IRC, freenode, #hurd, 2013-09-17:
+
+ <braunr_> gg0: is that what we do ??
+ <gg0> braunr: well, i was looking at
+ http://sources.debian.net/src/eglibc/2.17-92/debian/patches/hurd-i386/tg-thread-cancel.diff
+ <gg0> which afaics fixes
+ http://www.gnu.org/software/hurd/open_issues/thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock.html
+ <gg0> the one i get now is
+ http://sources.debian.net/src/eglibc/2.17-92/sysdeps/mach/hurd/jmp-unwind.c#L53
+ <gg0> 09:12 < youpi> gg0: it's not exactly the same: in
+ hurd_thread_cancel we hold no lock at all at the assertion point
+ <gg0> 09:13 < youpi> in jmp-unwind.c, we do hold a lock
+ <gg0> 09:13 < youpi> and the assertion might be actually true
+ because all other threads are supposed to hold the first lock
+ before taking the other one
+ <braunr> gg0: that assertion is normal
+ <braunr> it says there is a deadlock
+ <braunr> ss->critical_section_lock must be taken before ss->lock
+ <gg0> you mean ss->lock before ss->critical_section_lock
+ <braunr> no
+ <gg0> ah ok got it
+ <braunr> that's a bug
+ <braunr> longjmp
+ <braunr> ugh
+ <braunr> you could make a pass through the various uses of those
+ locks and check what the intended locking protocol should be
+ <braunr> i inferred ss->critical_section_lock before ss->lock from
+ hurd_thread_cancel
+ <braunr> this might be wrong too but considering this function is
+ used a lot, i doubt it
+ <gg0> (no, i hadn't got it, i was looking at jmp-unwind.c where
+ lock is before critical_section_lock)
+ <gg0> could we get useful info from gdb'ing the assertion?
+ <tschwinge> gg0: Only if you first get an understanding why it is
+ happening, what you expect to happen instead/why it shall not
+ happen/etc. Then you can perhaps use GDB to verify that.
+ <gg0> i can offer an irc interface if anyone is interested, it's
+ ready, just to attach :)
+ <gg0> this is the test
+ http://sources.debian.net/src/ruby1.9.1/1.9.3.448-1/test/ruby/test_io.rb#L937
+ <gg0> pipe function creates two threads
+ http://sources.debian.net/src/ruby1.9.1/1.9.3.448-1/test/ruby/test_io.rb#L26
+ <gg0> Attaching to pid 15552
+ <gg0> [New Thread 15552.1]
+ <gg0> [New Thread 15552.2]
+ <gg0> (gdb)
+
+ IRC, freenode, #hurd, 2013-09-21:
+
+ <youpi> gg0: it seems the assert (! __spin_lock_locked
+ (&ss->critical_section_lock)); is bogus
+ <youpi> but it'd be good to catch a call trace
+ <youpi> well, it may not be bogus, in case that lock is only ever
+ taken by the thread itself
+ <youpi> in that case, inside longjmp_unwind we're not supposed to
+ have it already
+ <youpi> ok, that's what we had tried to discuss with Roland
+ <youpi> it can happen when playing with thread cancelation
+ <braunr> youpi: the assertion isn't exactly bogus
+ <braunr> the lock ordering is
+ <youpi> braunr: which one are you talking about?
+ <youpi> the one in hurd_thread_cancel looks really wrong
+ <youpi> and some parts of the code keep the critical section lock
+ without ss->lock held, so I don't see how lock ordering can help
+
+ IRC, OFTC, #debian-hurd, 2013-09-22:
+
+ <gg0> how much does this patch suck on a scale from 1 to 10?
+ http://paste.debian.net/plain/44810/
+ <youpi> well, the stack allocation issue will go away once I get
+ the threadvars away
+ <youpi> I'm working on it right now
+ <youpi> about the lib paths, it makes sense to add the gnu case,
+ but i386-gnu shouldn't be put in the path
+ <gg0> that's great
+ <gg0> so seems the wrong moment for what i've already done
+ ie. asking terceiro what he thinks about patch above :/
+ <gg0> any distro-independent way to get libc.so and libm.so path?
+ <gg0> ruby as last resource takes them from "ldd ruby"
+ <pinotree> gg0: should work fine then
+ <gg0> well it does. but gnu doesn't have a case so it hits default
+ which is broken
+ http://bugs.ruby-lang.org/projects/ruby-trunk/repository/revisions/40235/entry/test/dl/test_base.rb
+ <gg0> btw even linux and kfreebsd with debian multipath have broken
+ cases but they don't hit default and get fixed by ldd later
+ <pinotree> why it is broken? are arguments passed to that script?
+ <gg0> i'm not sure about what propose. a broken case so it doesn't
+ hit default like linux and kfbsd
+ <gg0> yes they are :/
+ <pinotree> and which ones are? who executes that script and which
+ arguments does it pass to it?
+ <gg0> other ruby scripts which have nothing to do with libc/libm
+ <pinotree> well, if they pass arguments which should be the paths
+ to libc and libm, they must be getting such paths, aren't they?
+ <gg0> they don't. arguments are other ruby scripts, don't know why,
+ maybe something else broken before
+ <gg0> but that would mean that before there's a smarter path
+ detection way, i doubt
+ <pinotree> then add the case for hurd, but setting both libc and
+ libm as nil
+ <pinotree> so they will be fetched again
+ <gg0> yep and would really ugly
+ <gg0> +be
+ <gg0> "please commit this one which wrongly sets paths."
+ <gg0> an alternative would be removing default case
+ <gg0> or pointing it out by proposing ldd in hurd case might make
+ them review the whole detection
+ <gg0> by setting correct paths like in patch above it wouldn't
+ break a possible hurd-amd64, it would work due to ldd
+ <youpi> gg0: that's why I said the patch is fine, but without the
+ i386-gnu part of the path
+ <youpi> just like it happens to be on linux & kfreebsd
+ <gg0> i might take ldconfig -p output
+ <gg0> to make it uselessly correct from start
+ <gg0> http://bugs.ruby-lang.org/issues/8937
+ <pinotree> note thar ruby 1.8 is EOL
+ <pinotree> *that
+ <gg0> -- If you're reporting a bug in both Ruby 1.9/2.0 and Ruby
+ 1.8: ruby-trunk, and write like "this bug can be reproduced in
+ Ruby 1.8 as well." --
+ <gg0> i suspect this one won't be the only one i'll file. unless
+ upcoming youpi's tls and braunr's thread destruction patches fix
+ all ruby tests
+ <pinotree> did you check ruby2.0 too, btw?
+ <gg0> switched to ruby2 few hours ago. i pointed out 2nd part of
+ testsuite is not enabled, probably terceiro will enable it soon
+ <gg0> by applying my patch above we'd completely fix current
+ ruby2.0 build (yes because tests are not completely enabled)
+ <pinotree> what you run those extra tests?
+ <gg0>
+ http://anonscm.debian.org/gitweb/?p=collab-maint/ruby1.9.1.git;a=blob;f=debian/run-test-suites.bash
+ <gg0> make test + make test-all
+ <gg0> (test-all is 2nd part)
+ <gg0> many are problematic. i didn't finish yet to suppress them
+ one-by-one. one i suppress, another one pops up
+ <gg0> either get stuck or well known assertion
+ <pinotree> check those that get stuck :)
+ <gg0> which kind of check?
+ <pinotree> "check" as in "debug"
+ <gg0> btw i tested puppet few days ago (with ruby1.8), it seems to
+ be working, at least at trasferring files from master
+ <gg0> don't know about any advanced usage
+ <pinotree> ruby 1.8 is going to die soon, so testing things against
+ it is not totally useful
+ <gg0> so you assume 1.8 is less broken than 1.9/2.0, right?
+ <pinotree> no
+ <gg0> i just can see it's been built without tests itself too
+ <pinotree> erm no
+ <gg0> well ok, if i can be wrong, i'll be wrong
+ <gg0> i say that after a quick check time ago, might be wrong
+ <pinotree> `getbuildlogs ruby1.8 last hurd-i386`, see the last
+ build log
+ <gg0> ah from pkg-kde-tools
+ <gg0> i hate kde :)
+ <pinotree> no?
+ <gg0> no what?
+ <pinotree> devscripts: /usr/bin/getbuildlog
+ <gg0> pkg-kde-tools: /usr/bin/pkgkde-getbuildlogs
+ <pinotree> which is not what i said
+ <gg0> wait that's what apt-file found
+ <gg0> maybe i should update it
+ <gg0> is it so recent?
+ <pinotree> no
+ <pinotree> i just added an 's' more at the end of the command, but
+ typing getbu<tab> could have been helpful anyway...
+ <gg0> yeah just got it
+ <gg0> my fault not to have tried to run it before looking for it
+ <pinotree> and btw, i don't see what hating kde has to do with
+ tools developed by qt/kde debian packagers
+ <gg0> j/k i simply don't use kde, never used and apt-file search
+ told me it was from pkg-kde-tools
+ <gg0> btw build log says "make test" fails, doesn't even start. and
+ its failure doesn't block the build
+ <pinotree> exactly
+ <gg0> s/make test/make test-all/
+ <gg0> "make test" (aka "1st part" above) doesn't run. i guess it's
+ missing in packaging
+
+ IRC, freenode, #hurd, 2013-09-22:
+
+ <braunr> youpi: i mean the lock order where the assertion occurs is
+ reserved compared to the one in hurd_thread_cancel
+ <braunr> (and the one in hurd_thread_cancel is the same used in
+ hurd condition-related functions)
+ <youpi> "reserved" ?
+ <braunr> reversed
+ <braunr> :)
+ <youpi> by "the assertion occurs", you mean gg0's spot?
+ <braunr> yes
+ <youpi> well , the assertion also happens in hurd_thread_cancel
+ <braunr> it does oO
+ <braunr> i didn't see that
+ <youpi> but otherwise yes, it's completely bogus that we have both
+ locking in different orders
+ <youpi> could you submit the fix for jmp-unwind.c to upstream?
+ <braunr> what fix ?
+ <youpi> reversing the lock order
+ <braunr> ah, simply that
+ <youpi> (well, provided that hurd_thread_cancel is right)
+ <braunr> that's what i suggested to gg0
+ <braunr> to check where those locks are held and determine the
+ right order
+
* `recvmmsg`/`sendmmsg` (`t/sendmmsg`)
From [[!message-id "20120625233206.C000A2C06F@topped-with-meat.com"]],
@@ -401,13 +865,140 @@ Last reviewed up to the [[Git mirror's 0323d08657f111267efa47bd448fbf6cd76befe8
Then perhaps the Linux fallback case should be that instead of stubs,
too.*
+ * `SOCK_CLOEXEC`
+
+ IRC, freenode, #hurd, 2013-09-02:
+
+ <gnu_srs1> Do we support accept4 with the SOCK_CLOEXEC flag?
+ <gnu_srs1> According to the code in sysdeps/mach/hurd/accept4.c
+ that case is not covered
+ <gnu_srs1> (only O_NONBLOCK, not SOCK_NONBLOCK??))
+ <pinotree> gnu_srs1: we do
+ <pinotree> but only for accept4, not for socket and socketpair
+ <gnu_srs1> pinotree: cannot find the case for O_CLOEXEC covered in
+ __libc_accept4()
+ <pinotree> gnu_srs1: no, you need SOCK_*
+ <gnu_srs1> The only code for accept4() is in sysdeps/mach/hurd/ and
+ it uses O_* for flags ?
+ <pinotree> flags = sock_to_o_flags (flags);
+ <pinotree> tried checking it?
+ <gnu_srs1> Aha, tks:-D
+ <pinotree> and you don't need an explicit case of O_CLOEXEC, since
+ it is handled in other ways
+
+ [[!message-id "1378154151.21738.15.camel@G3620.my.own.domain"]].
+
+ IRC, freenode, #hurd, 2013-09-03:
+
+ <gnu_srs> any ideas about the SOCK_CLOEXEC issue?
+ <pinotree> didn't i tell already about it?
+ <gnu_srs> I did not find any hurd related code in tschwinges
+ branches.
+ <pinotree> you didn't check deep then...
+ <gnu_srs> so why does socket/socketpair not return ENOSYS then?
+ <pinotree> why should it, since they are implemented?
+ <braunr> ...
+ <gnu_srs> for socket/socketpair?
+ <braunr> gnu_srs: enosys means no system call
+ <gnu_srs> s/ENOSYS/EINVAL/
+ <gnu_srs> see the mail to the bug-hurd/debian-hurd ML for more info
+ <gnu_srs> and tschwinges reply
+ <pinotree> which is what i knew already?
+ <gnu_srs> pinotree: please reply on the mailing list on the EINVAL
+ vs EPROTOTYPE issue to clarify things
+ <pinotree> gnu_srs:
+ https://sourceware.org/ml/libc-alpha/2013-02/msg00092.html
+ <pinotree> gnu_srs: things were clear already...
+ <gnu_srs> pinotree: I've read that patch and still pflocal/pf.c
+ returns EPROTOTYPE not changed by the __socket wrapper in eglibc
+ <pinotree> gnu_srs: what about realizing SOCK_CLOEXEC and friends
+ are NOT posix?
+ <gnu_srs> since socket/socketpair does not return EINVAL the dbus
+ code has to be patched then?
+ <pinotree> pflocal should never ever get such flags mixed to the
+ protocol, so any invalid value of protocol correctly returns
+ EPROTOTYPE
+ <gnu_srs> this is the question I need answered: Which way to go?
+ <pinotree> all of them
+ <gnu_srs> ?
+ <pinotree> - applications should not assume that because you have
+ accept4 (which is not posix) then SOCK_CLOEXEC and SOCK_NONBLOCK
+ (flags for it) are usable to socket and socketpair
+ <pinotree> - glibc should (as the idea of my patch) handle
+ implementations providing SOCK_* but not supporting them for
+ socket/socketpair
+ <pinotree> - finally the hurd part of glibc could implement them
+ <gnu_srs> to conclude: should I send a bug report for dbus then?
+ <gnu_srs> pinotree: yes or no?
+ <pinotree> gnu_srs: *shrug* i wrote it above, so an *upstream*
+ report (not a debian one)
+
+ IRC, freenode, #hurd, 2013-09-06:
+
+ <gnu_srs> I've found another error code issue, now in glib2.0 (and
+ dbus). Are you really sure the error code
+ <gnu_srs> for protocol of pflocal/pf.c should be
+ EPROTONOSUPPORT. The code expects EINVAL for a protocol with
+ <gnu_srs> SOCK_CLOEXEC, which is a flag. Maybe pf.c should add
+ this case and return EINVAL instead of
+ <gnu_srs> submitting bug reports upstream. Yes, I know this is not
+ POSIX, but it is defined for Hurd too,
+ <gnu_srs> currently only supported for accept4, not socket or
+ socketpair.
+ <pinotree> gnu_srs: no, and i explained already why it is wrong
+ this way
+ <pinotree> pflocal shouldn't even get such flags
+ <pinotree> (pflocal or any other server implementing socket_create)
+ <gnu_srs> (20:19:35) pinotree: pflocal shouldn't even get such
+ flags
+ <gnu_srs> then the glibc wrapper code is missing to catch this
+ flag:(
+ <gnu_srs> youpi: ?
+ <pinotree> gnu_srs: because, as told many times, socket and
+ socketpair do not support such flags
+ <pinotree> given they don't do, they filter nothing
+ <pinotree> and no, you need to file bugs upstream, since having
+ SOCK_* and accept4 does not imply at all that socket and
+ socketpair support them
+
+ IRC, freenode, #hurd, 2013-09-07:
+
+ <gnu_srs> A correction from yesterdays discussion:
+ s/EPROTONOSUPPORT/EPROTOTYPE
+
+ IRC, freenode, #hurd, 2013-09-10:
+
+ <gnu_srs> for dbus2.0 I found out that the third SOCK_CLOEXEC case
+ needs a patch too (more working tests),
+ <gnu_srs> the updated patch is at http://paste.debian.net/37948/ if
+ you have the time, otherwise I'll do it.
+ <pinotree> gnu_srs: which is what i wrote in my bug report...
+ <gnu_srs> Yes you wrote that, but the patch is not updated yet?
+ <pinotree> it refers to a different socket access, recently added,
+ which is not used by default
+ <gnu_srs> I got two more tests running when adding that patch:-/
+ <pinotree> tests of what?
+ <gnu_srs> run-test.sh and run-test-systemserver.sh:P
+ <pinotree> tests of what?
+ <pinotree> i don't have the universal knowledge of the files in all
+ the sources
+ <gnu_srs> dbus-1.6.14/test/name-test/*
+
+ [[!message-id "523A3D6C.2030200@gmx.de"]].
+
+ IRC, OFTC, #debian-hurd, 2013-09-19:
+
+ <pinotree> tschwinge: ehm, regarding the SOCK_* patches for
+ socket/socketpair, didn't we talk about them when i worked on
+ eglibc 2.17?
+
For specific packages:
* [[octave]]
* Create `t/cleanup_kernel-features.h`.
- * Add tests from Linux kernel commit messages for `t/dup3` et al.
+ * [[Secure_file_descriptor_handling]].
* In `sysdeps/unix/sysv/linux/Makefile`, there are a bunch of
`-DHAVE_SENDFILE` -- but we do have `sendfile`, too.
@@ -927,6 +1518,31 @@ Last reviewed up to the [[Git mirror's 0323d08657f111267efa47bd448fbf6cd76befe8
<tschwinge> Yeah, that's known for years... :-D
<tschwinge> Probably not too difficult to resolve, though.
+ * IRC, OFTC, #debian-hurd, 2013-08-16:
+
+ <pinotree> http://paste.debian.net/25934/ ← _hurd_thread_sigstate calls
+ malloc, boom
+
+ * `conformtest`
+
+ IRC, OFTC, #debian-hurd, 2013-09-22:
+
+ <youpi> btw, I noticed that glibc has a head conformance test which we
+ happily fail quite a bit :)
+ <youpi> it's not so awful, we don't have twice as many failures as
+ linux, but not so far
+ <pinotree> youpi: do you mean "header" for "head", right?
+ <youpi> err, where ? :)
+ <pinotree> <youpi> btw, I noticed that glibc has a head conformance
+ test which we happily fail quite a bit :)
+ <youpi> ah, yes
+ <pinotree> noticed that too
+ <youpi> I had a quick look at the POSIX part, some things are probably
+ not too hard to change (e.g. exposing pthread_kill in signal.h)
+ <youpi> others will by quite hard to fix (short type instead of int
+ type for some flock structure field)
+ <youpi> s/by/be/
+
* Verify baseline changes, if we need any follow-up changes:
* a11ec63713ea3903c482dc907a108be404191a02
@@ -1253,6 +1869,13 @@ TODO.
[[!message-id "20120723195143.7F8142C0B9@topped-with-meat.com"]].
+ IRC, freenode, #hurd, 2013-08-27:
+
+ < gnu_srs> Hi, is this fixed by now?
+ < gnu_srs> ../hurd/hurd.h:72:5: warning: case value ‘0’ not in
+ enumerated type ‘error_t’ [-Wswitch]
+ < pinotree> nope
+
* baseline
fd5bdc0924e0cfd1688b632068c1b26f3b0c88da..2ba92745c36eb3c3f3af0ce1b0aebd255c63a13b
(or probably Samuel's mmap backport) introduces:
@@ -1389,6 +2012,23 @@ Failures, mostly in order of appearance:
Due to `ext2fs --no-atime`.
+ * IRC, OFTC, #debian-hurd, 2013-05-08
+
+ <youpi> bah, tst-atime failure :)
+ <pinotree> do you have its output?
+ <youpi> well it's very simple
+ <youpi> I have the noatime option on / :)
+ <pinotree> oh
+ <youpi> fortunately fsysopts works :)
+ <pinotree> the test checks whether ST_NOATIME is in the mount
+ options, maybe it would be a good idea to provide it
+ <youpi> yes
+ <pinotree> unfortunately it isn't in posix, so i'm not sure whether
+ adding it to the general bits/statvfs.h would be welcome
+ <pinotree> or whether we should fork it, like it is done for linux
+ <pinotree> oh no, we fork it already
+ <pinotree> \o/
+
`dirent/tst-fdopendir.out`:
directory atime changed
diff --git a/open_issues/glibc/0.4.mdwn b/open_issues/glibc/0.4.mdwn
index ceb5ea21..f864469d 100644
--- a/open_issues/glibc/0.4.mdwn
+++ b/open_issues/glibc/0.4.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2012, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -10,6 +10,8 @@ License|/fdl]]."]]"""]]
[[!tag open_issue_glibc open_issue_libpthread]]
+Things to consider doing when bumping the glibc SONAME.
+
# IRC, freenode, #hurd, 2012-12-14
diff --git a/open_issues/glibc/debian.mdwn b/open_issues/glibc/debian.mdwn
index 331632f3..2ef2c474 100644
--- a/open_issues/glibc/debian.mdwn
+++ b/open_issues/glibc/debian.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2011, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -24,6 +24,43 @@ locale stuff.
`--disable-compatible-utmp`?
+## IRC, freenode, #hurd, 2013-08-28
+
+ <youpi> uh, the i686 profiles have much more progression than i386
+ <youpi> it seems they don't actually run these
+ <pinotree> youpi: what do you mean with "we don't run those"?
+ <pinotree> iirc there are three build profiles done, but there are 4
+ regression test files
+ <youpi> yes, but some failing tests are not run in the three build profiles
+ <youpi> even if they are built for all of them
+ <pinotree> not even run? which ones?
+ <youpi> see for instance test-ifloat.out
+ <youpi> test-ifloat is built in all profiles, but only run in the libc one
+ <pinotree> don't have a glibc built tree around atm, sorry :/
+ <youpi> perhaps because glibc thinks it's not useful to run it again if it
+ fails on i386
+ <youpi> you can check the logs
+ <pinotree> do you think glibc's build system is that smart? :)
+ <pinotree> all the builds are done in separate builddirs, so theorically
+ they should not touch each other...
+ <youpi> yes
+ <youpi> that's why I'm surprised
+ <pinotree> could it be they get not run in optimized/particular builds?
+ <pinotree> what about linux/kfreebsd i386?
+ <youpi> I don't see what makes them not run
+ <youpi> or at least be treated particularly by th eMakefile
+ <youpi> not run on kfreebsd either
+ <youpi> pinotree: also, most of the tests now working have been marked as
+ failing by your patches for 2.17, would it be possible to retry them on
+ the box you used at that time?
+ <pinotree> that's the vm on my machine
+ <youpi> which kind of vm?
+ <youpi> kvm?
+ <pinotree> y
+ <youpi> they are working here
+ <youpi> with kvm
+
+
# Building
Run `debian/rules patch` to apply patches (instead of having it done during the
@@ -62,3 +99,70 @@ apter applying patches.
If the Debian symbol versioning file is not up to date and the build of Debian
packages fails due to this, putting `DPKG_GENSYMBOLS_CHECK_LEVEL=0` in the
environment \`\`helps''; see `man dpkg-gensymbols`.
+
+
+# IRC, freenode, #hurd, 2013-07-01
+
+ <braunr> something seems to have changed with regard to patch handling in
+ eglibc 2.17
+ <braunr> pinotree: when i add a patch to series and use dpkg-buildpackage,
+ i'm told there are local modifications and the build stops :/
+ <braunr> any idea what i'm doing wrong ?
+ <pinotree> which steps do you do?
+ <braunr> i extract the sources, copy the patch to debian/patches/hurd-i386,
+ add the appropriate line to debian/patches/series, call dch -i, then
+ dpkg-buildpackage
+ <pinotree> eglibc is a "3.0 (quilt)" format source package
+ <pinotree> this means its default patches are in a quilt-style system, and
+ they are applied on extraction
+ <braunr> ok
+ <braunr> and it can't detect new patches ?
+ <pinotree> so if you add a new patch to the global serie, you have to push
+ it manually
+ <braunr> i have to revert them all ?
+ <braunr> ok
+ <braunr> how do i do that ?
+ <pinotree> quilt push -a
+ <braunr> ok
+ <braunr> thanks
+ <pinotree> remember to do that before starting the build, since the rest
+ assumes the quilt-style patches are fully applied
+ <bddebian> No push applies them, quilt pop -a reverts them
+ <pinotree> yeah, and he has to push the new over the dpkg-applied ones
+ <bddebian> Oh, aye
+ <braunr> does quilt change series ?
+ <pinotree> no
+ <braunr> ok
+ <pinotree> i mean, some commands do that
+ <braunr> so i do everything i did, with an additional push, right ?
+ <pinotree> ok, screw me, i didn't get your question above :P
+ <braunr> does that change your answer ?
+ <pinotree> <braunr> does quilt change series ?
+ <braunr> yes
+ <pinotree> if you import or create a new patch, it changes series indeed
+ <braunr> ok
+ <pinotree> push or pop of patches does not
+ <braunr> i'm doing it wron
+ <braunr> g
+ <pinotree> btw, in a quilt patch stack you can easily import a new patch
+ using the import command
+ <pinotree> so for example you could do
+ <pinotree> apt-get source eglibc # or get it somehow else
+ <pinotree> cd eglibc-*
+ <pinotree> quilt import /location/of/my/patch
+ <pinotree> quilt push # now your patch is applied
+ <braunr> ah thanks
+ <pinotree> dpkg-buildpackage as usual
+ <braunr> that's what i was looking for
+ <bddebian> quilt new adds a new entry in series
+ <pinotree> y
+ <bddebian> or import, aye
+ <pinotree> braunr: if you want to learn quilt, a very good doc is its own,
+ eg /usr/share/doc/quilt/quilt.txt.gz
+ * bddebian has never actually used import
+ <braunr> ok
+ <pinotree> it is basically a simple stack of patches
+
+ <youpi> braunr: yes, patch handling is a bit different
+ <youpi> the arch-independant patches are applied by dpkg-source -x
+ <youpi> and the arch-dependent patches are applied during build
diff --git a/open_issues/glibc/debian/experimental.mdwn b/open_issues/glibc/debian/experimental.mdwn
index 8d117e99..5168479d 100644
--- a/open_issues/glibc/debian/experimental.mdwn
+++ b/open_issues/glibc/debian/experimental.mdwn
@@ -11,6 +11,7 @@ License|/fdl]]."]]"""]]
[[!tag open_issue_glibc]]
Issues with the current 2.17 version of glibc/EGLIBC in Debian experimental.
+Now in unstable.
# IRC, OFTC, #debian-hurd, 2013-03-14
@@ -113,3 +114,62 @@ Issues with the current 2.17 version of glibc/EGLIBC in Debian experimental.
eventually? (Some experimental package(s), but which?)
<youpi> that was libc0.3 packages
<youpi> which are indeed known to break the network
+
+
+# IRC, freenode, #hurd, 2013-06-18
+
+ <braunr> root@darnassus:~# dpkg-reconfigure locales
+ <braunr> Generating locales (this might take a
+ while)... en_US.UTF-8...Segmentation fault
+ <braunr> is it known ?
+ <youpi> uh, no
+
+
+## IRC, OFTC, #debian-hurd, 2013-06-19
+
+ <pinotree> btw i saw too the segmentation fault when generating locales
+
+
+# IRC, OFTC, #debian-hurd, 2013-06-20
+
+ <youpi> damn
+ <youpi> hang at ext2fs boot
+ <youpi> static linking issue, clearly
+
+
+## IRC, freenode, #hurd, 2013-06-30
+
+ <youpi> Mmm
+ <youpi> __access ("/etc/ld.so.nohwcap", F_OK) at startup of ext2fs
+ <youpi> deemed to fail....
+ <pinotree> when does that happen?
+ <youpi> at hwcap initialization
+ <youpi> at least that's were ext2fs.static linked against libc 2.17 hangs
+ at startup
+ <youpi> and this is indeed a very good culprit :)
+ <pinotree> ah, a debian patch
+ <youpi> does anybody know a quick way to know whether one is the / ext2fs ?
+ :)
+ <pinotree> isn't the root fs given a special port?
+ <youpi> I was thinking about something like this, yes
+ <youpi> ok, boots
+ <youpi> I'll build a 8~0 that includes the fix
+ <youpi> so people can easily build the hurd package
+ <youpi> Mmm, no, the bootstrap port is also NULL for normally-started
+ processes :/
+ <youpi> I don't understand why
+ <youpi> ah, only translators get a bootstrap port :/
+ <youpi> perhaps CRDIR then
+ <youpi> (which makes a lot of sense)
+
+
+## IRC, freenode, #hurd, 2013-07-01
+
+ <braunr> youpi: what is local-no-bootstrap-fs-access.diff supposed to fix ?
+ <youpi> ext2fs.static linked againt debian glibc 2.17
+ <youpi> well, as long as you don't build & use ext2fs.static with it...
+ <braunr> that's thing, i want to :)
+ <braunr> +the
+ <youpi> I'd warmly welcome a way to detect whether being the / translator
+ process btw
+ <youpi> it seems far from trivial
diff --git a/open_issues/glibc/t/tls-threadvar.mdwn b/open_issues/glibc/t/tls-threadvar.mdwn
index 105a07c7..7ce36f41 100644
--- a/open_issues/glibc/t/tls-threadvar.mdwn
+++ b/open_issues/glibc/t/tls-threadvar.mdwn
@@ -64,3 +64,55 @@ dropped altogether, and `__thread` directly be used in glibc.
<youpi> I saw the mails, but didn't investigate at all
[[!message-id "878vdyqht3.fsf@kepler.schwinge.homeip.net"]].
+
+
+# IRC, freenode, #hurd, 2013-07-08
+
+ <youpi> tschwinge: apparently there were a lot of changes missing in the
+ threadvars branch I had commited long time ago
+ <youpi> I'm gathering things
+ <tschwinge> youpi: t/tls-threadvar you mean?
+ <youpi> yes
+ <youpi> tschwinge: yes, there were a lot other occurences of threadvars
+ stuff in various places
+ <youpi> I'm building libc again, and will see what issue would remain
+
+
+## IRC, freenode, #hurd, 2013-07-12
+
+ <youpi> braunr: about the per-thread ports, there is also the mig reply
+ port
+ <youpi> (stored in _HURD_THREADVAR_MIG_REPLY)
+
+
+## IRC, freenode, #hurd, 2013-07-15
+
+ <braunr> and with the branch youpi pushed where he removes threadvars, it
+ shouldn't get "too" hard
+ <braunr> (save for the tricky bugs you may encounter)
+ <youpi> well, that branch is not working yet
+
+
+## IRC, OFTC, #debian-hurd, 2013-09-22
+
+ <youpi> I'm currently tracking bugs with my threadvars changes
+ <youpi> some of them seem fine, others, not
+ <youpi> of course the most complex ones are the most probable culprits for
+ the issues I'm getting
+ <youpi> fortunately they're after the process bootstrap
+ <youpi> so basically that works
+ <youpi> just a few dozen tests fail
+ <youpi> mostly about loading .so or raising signals
+ <youpi> dlopen("bug-dlsym1-lib1.so"): bug-dlsym1-lib1.so: cannot open
+ shared object file: Function not implemented
+ <youpi> after having changed errno a bit
+ <youpi> doesn't that look odd ? :)
+ <youpi> good, I found an issue with the sigstate
+ <youpi> now running testsuite again, to see whether there are other issues
+ with it :)
+ <youpi> s/sigstate/reply_port/ actually
+
+
+## IRC, OFTC, #debian-hurd, 2013-09-23
+
+ <youpi> yay, errno threadvar conversion success
diff --git a/open_issues/glibc/t/tls.mdwn b/open_issues/glibc/t/tls.mdwn
index 68db2cc1..a92a21fb 100644
--- a/open_issues/glibc/t/tls.mdwn
+++ b/open_issues/glibc/t/tls.mdwn
@@ -1,4 +1,5 @@
-[[!meta copyright="Copyright © 2011, 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2011, 2012, 2013 Free Software Foundation,
+Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -14,7 +15,8 @@ License|/fdl]]."]]"""]]
* Discuss d2431f633e6139a62e1575ec18830f7e81160cf0 with Samuel.
- * `TLS_INIT_TP_EXPENSIVE` is unused; Hurd def. can be removed.
+ * Validate our implementation against
+ <https://sourceware.org/glibc/wiki/TLSandSignals>.
# Documentation
diff --git a/open_issues/gnumach_integer_overflow.mdwn b/open_issues/gnumach_integer_overflow.mdwn
index 2166e591..08a29268 100644
--- a/open_issues/gnumach_integer_overflow.mdwn
+++ b/open_issues/gnumach_integer_overflow.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2012, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -15,3 +15,36 @@ License|/fdl]]."]]"""]]
<braunr> yes, we have integer overflows on resident_page_count, but
luckily, the member is rarely used
+
+See also [[gnumach_vm_object_resident_page_count]].
+
+
+## IRC, freenode, #hurd, 2013-06-04
+
+ <elmig> this is declared as int on vm_object.h
+ <elmig> and as it as counter it's always positive
+ <braunr> yes it should be unsigned
+ <elmig> ok
+ <braunr> but leave it as it is for consistency with the rest
+ <elmig> i send patch :)
+ <braunr> please no
+ <braunr> unless you've fully determined the side effects
+ <elmig> i've grepped the vars and saw only comparisons > and = 0
+ <elmig> never less than 0
+ <braunr> > 0 is the same
+ <braunr> well
+ <braunr> > not, but >= would be a problem
+ <elmig> http://paste.debian.net/plain/8527
+ <elmig> asctually no >=0
+ <braunr> still, i don't want to change that unless it's strictly necessary
+ <braunr> hum, you're grepping ref_count, not resident_page_count
+ <elmig> i did both
+ <elmig> on resident_page_count theres resident_page_count >= 0
+ <elmig> = 0, == 0
+ <braunr> this isn't the only possible issue
+ <braunr> anyway
+ <braunr> for now there is no reason to change anything unless you do a full
+ review
+ <elmig> only place i see resdent_page_count and page_count being decreased
+ it's on vm/vm_resident.c
+ <elmig> vm_page_remove() and vm_page_replace()
diff --git a/open_issues/gnumach_vm_object_resident_page_count.mdwn b/open_issues/gnumach_vm_object_resident_page_count.mdwn
index e6c7897f..2ffe5753 100644
--- a/open_issues/gnumach_vm_object_resident_page_count.mdwn
+++ b/open_issues/gnumach_vm_object_resident_page_count.mdwn
@@ -22,5 +22,33 @@ License|/fdl]]."]]"""]]
`vm/vm_object.h:vm_object:resident_page_count`.
+## IRC, freenode, #hurd, 2013-06-03
+
+ <elmig> regarding
+ https://www.gnu.org/software/hurd/open_issues/gnumach_vm_object_resident_page_count.html,
+ this is fixed. it's an int. what should happen do this page? /dev/null
+ <elmig> ?
+ <youpi> I guess so
+
+
+## IRC, freenode, #hurd, 2013-06-04
+
+ <elmig>
+ http://darnassus.sceen.net/~hurd-web/open_issues/gnumach_vm_object_resident_page_count/
+ <elmig> this is a int
+ <elmig> how to deal with the page? delete it? archive it?
+ <braunr> ?
+ <elmig> the issue originallu reported was fixed, right?
+ <braunr> i think so, yes
+ <braunr> for now at least
+ <elmig> so this stays on the open_issues on the wiki anyway?
+ <braunr> no, it should go away
+ <elmig> i dont know how to suggest deletion on the wiki
+ <braunr> don't
+ <braunr> i'll do it later
+
+
+## 2013-06-04
+
resident_page_count it's now an int.
The topic it's fixed.
diff --git a/open_issues/hurd_init.mdwn b/open_issues/hurd_init.mdwn
new file mode 100644
index 00000000..b0b58a70
--- /dev/null
+++ b/open_issues/hurd_init.mdwn
@@ -0,0 +1,216 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+[[!tag open_issue_hurd]]
+
+
+# [[!message-id "20130625154749.17799.36923@thinkbox.jade-hamburg.de"]]
+
+
+## IRC, freenode, #hurd, 2013-07-22
+
+ <teythoon> ok, so back to the drawing board for the next big issue, the
+ potential proc and init merge
+ <teythoon> Roland had some harsh words for that proposal, but noone else
+ raised concerns
+ <youpi> noone else does not mean much
+ <youpi> I guess only Roland actually understands the matter
+ <youpi> so I'd tend to believe him
+ <teythoon> even though, his criticism was so superficial, he could at least
+ be a bit more specific...
+ <braunr> i agree that the argument, being simply based on vague principle,
+ isn't very convincing
+ <teythoon> so, what should I do?
+ <braunr> you can either keep them separate, or fight with roland
+ <teythoon> common braunr, I need a little more guidance in these kind of
+ social issues
+ <teythoon> a statement like this is of little use ;)
+ <braunr> that's the best i can give you
+ <teythoon> :/
+ <braunr> i have one patch "fixing" HZ on the hurd, and i even get to fight
+ about it
+ <teythoon> I understand Roland has been around forever and keeps an eye on
+ stuff
+ <teythoon> but could/would he block a patch for hurd if e.g. youpi would
+ accept it
+ <teythoon> i.e. how much control has he in practice?
+ <teythoon> me fighting with him over a patch is of little value for anyone
+ and I don't care to do so
+ <braunr> not much i suppose now
+ <braunr> but we also have to agree with the change
+ <braunr> with *real* arguments
+ <braunr> (well, if it was up to me, i'd even merge exec with proc so ..)
+ <teythoon> ok, so I whip up a patch to see how it goes in practice and
+ present it so we could talk about the issue with something to look at
+ first
+ <braunr> although maybe not ;p
+ <braunr> you'll hit the same reaction
+ <teythoon> from Roland?
+ <braunr> yes
+ <braunr> and youpi said he tends to trust what roland says
+ <braunr> so let's discuss the pros and cons a bit more
+ <teythoon> yes, but I'd honor his concerns if they were properly
+ presented. just telling me to hack on linux instead even though I think I
+ have demonstrated that I do want to work on Hurd is so childish in my
+ eyes that I do not consider that a valid argument at the moment
+ <teythoon> sure, shoot
+ <braunr> well, functionally, they're unrelated
+ <teythoon> head -n1 init/init.c
+ <teythoon> /* Start and maintain hurd core servers and system run state
+ <youpi> and thus it makes sense to make them separate, even if it does not
+ seem to bring anything useful now
+ <youpi> history has shown that it makes a bed for nice things later
+ <braunr> teythoon: that's not what proc is about
+ <teythoon> braunr: I know
+ <teythoon> braunr: that's what init is about in its own words ;)
+ <youpi> teythoon: also, "simplifying the code" is not necessarily an
+ argument that would be considered
+ <youpi> depending on the simplification
+ <youpi> linux made it all simple by using a monolithic kernel :)
+ <youpi> separating concerns is complex
+ <youpi> but in the end it usually pays off on the Hurd
+ <youpi> personally, I'd be fine with Guillem's solution, and renumbering
+ init's pid in Debian
+ <youpi> there's a pending question from Roland actually: what information
+ is exchanged between init and proc in the end?
+ <youpi> that's actually the point of the discussion: is that information
+ really big or not
+ <teythoon> I'm sorry, you lost me, where did he ask that question?
+ <pinotree> $ git grep proc_getmsgport | egrep '[0-9]' ← /hurd/init as pid 1
+ is hardcoded in few places
+ <youpi> teythoon: he didn't ask it this way, but that's the question I had
+ to be able to answer his
+ <youpi> Date: Mon, 15 Jul 2013 10:36:35 -0700 (PDT)
+ <youpi> > That's not what he said. He said there is a lot of information
+ <youpi> > propagated from init to proc, and thus the separation is
+ questionable.
+ <youpi> Are you talking about bootstrap, or what?
+ <youpi> as I haven't investigated much, I couldn't answer this
+ <youpi> pinotree: right. We could patch these in Debian
+ <teythoon> youpi: so, shall I refresh, test and refine Guillems patch and
+ resend it?
+ <youpi> it's probably an easier way
+ <teythoon> ok, I start by doing that
+
+
+## IRC, freenode, #hurd, 2013-07-25
+
+ <teythoon> pinotree: btw, there are two /sbin/init processes even with my
+ hacked up init/proc variant where /sbin/init gets to be pid 1
+ <pinotree> never seen that
+ <pinotree> what are their parents?
+ <teythoon> pinotree: well, pid 1 is /sbin/init now, pid 13 or something has
+ the parent 1
+ <teythoon> looks like init forks or something
+ <pinotree> i guess your sysvinit is compiled without INITDEBUG?
+ <pinotree> nothing in syslog either?
+ <teythoon> pinotree: it's compiled like the sysvinit shipped with debian
+ <pinotree> teythoon: do you have custom additions in inittab?
+ <teythoon> pinotree: a terminal for my serial console
+ <teythoon> *getty
+ <pinotree> are the getty started correctly for you, btw?
+ <teythoon> pinotree: yes
+ <pinotree> interesting
+ <pinotree> teythoon: back then, they were costantly respawning, with hurd's
+ getty's failing to start when exec'ed by (sysv)init
+ <pinotree> wonder what changed
+ <teythoon> pinotree: cool, magically went away then :)
+
+
+## IRC, freenode, #hurd, 2013-07-29
+
+ <teythoon> youpi: I need some feedback on the not freezing translators
+ issue, more specifically whether I understood you correctly in your mail
+ from wednesday (20130724131552.GG9576@type.bordeaux.inria.fr)
+ <teythoon> oh yeah, and I had some questions yesterday too, about rpctrace
+ and dead-name notifications, specifically why /hurd/init is not receiving
+ any for the root translator and the exec server
+ <braunr> teythoon: more details please
+ <teythoon> ok, so /hurd/init is registering for dead name notifications for
+ essential tasks
+ <teythoon> the rootfs and exec both register as essential tasks at init and
+ init requests successfully dead name notifications for them
+ <teythoon> if you e.g. kill the auth server, /hurd/init will notice and
+ crash the system
+ <teythoon> if you kill exec or the rootfs, /hurd/init does not get notified
+ <teythoon> I verified this with gdb and an subhurd
+ <teythoon> I'm puzzled by this, as the kernel is the one who sends the
+ notifications, right?
+ <braunr> yes
+ <braunr> teythoon: where is the problem ?
+ <teythoon> and it is not that the system is not sending any messages, it
+ is, I see the msgcount increase over time
+ <teythoon> braunr: dunno, as far as I can tell the kernel does not deliver
+ the notification for rootfs and exec
+ <braunr> oh
+ <teythoon> those are the two processes loaded by grub, maybe they are
+ different somehow
+ <braunr> is that affecting your work ?
+ <teythoon> no, not directly, I strayed around at the weekend, trying to
+ think of cool stuff hurd could do
+ <teythoon> youpi: I need some feedback on the not freezing translators
+ issue, more specifically whether I understood you correctly in your mail
+ from wednesday (20130724131552.GG9576@type.bordeaux.inria.fr)
+ <youpi> teythoon: ok, now I'm available for the not-freezing-translators
+ thing :)
+
+
+## IRC, freenode, #hurd, 2013-08-05
+
+ <teythoon> youpi: I'm in the process of producing a unified
+ sysvinit-as-pid1 and please-dont-kill-important-processes patch series
+ <teythoon> youpi: there is one issue with changing /hurd/inits pid, libcs
+ reboot() also assumes that it has the pid 1
+ <youpi> argl
+ <youpi> that's bad, because it's then an ABI, not just an internal thing
+ <teythoon> hardcoding the pid is the worst way of getting a handle of any
+ server :/
+ <teythoon> I've been thinking to make it explicit by binding it to
+ /servers/startup or something
+ <youpi> that would be more hurdish than using a pid, yes
+ <teythoon> yes, and not only does it break the abi, but in a bad way
+ too. if the libc is updated before the hurd, the shutdown sequence is
+ broken in a way that the translators aren't synced :/
+ <teythoon> youpi: as a workaround, we could make reboot() signal both pid 1
+ and 2
+ <youpi> at worse pid 1 shouldn't get harmed by receiving a startup_reboot
+ RPC indeed
+ <teythoon> yes
+
+
+## IRC, freenode, #hurd, 2013-08-16
+
+ <teythoon> grml, the procfs hardcodes the kernels pid :/
+ <teythoon> there's always one more thing to fix...
+ <teythoon> uh, and we made pids.h a private header, so no nice constant for
+ the procfs translator :/
+ <teythoon> server lookup by hardcoding the pid should be banned...
+
+
+## IRC, freenode, #hurd, 2013-09-16
+
+ <teythoon> youpi: I'm thinking about splitting /hurd/init into /hurd/init
+ and /hurd/startup
+ <teythoon> that way, you could also merge the init as pid1 patches
+ <teythoon> that should be doable within the week
+ <youpi> that would probably be better received by Roland than merging init
+ into proc :)
+ <teythoon> yes, I suppose so :D
+ <youpi> perhaps you should start the discussion on the list about it
+ already, with just a sketch of which would do what
+ <teythoon> ok
+ <teythoon> fwiw I like the name startup b/c it speaks the startup protocol
+ <braunr> teythoon: +1 startup
+
+
+## IRC, freenode, #hurd, 2013-09-23
+
+ <teythoon> I've been hacking on init/startup, I've looked into cleaning it
+ up
diff --git a/open_issues/libc_variant_selection.mdwn b/open_issues/libc_variant_selection.mdwn
index afcd9ae0..71370a43 100644
--- a/open_issues/libc_variant_selection.mdwn
+++ b/open_issues/libc_variant_selection.mdwn
@@ -1,4 +1,5 @@
-[[!meta copyright="Copyright © 2010, 2011 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2010, 2011, 2013 Free Software Foundation,
+Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -28,6 +29,28 @@ On Thu, Oct 07, 2010 at 11:22:46AM +0200, Samuel Thibault wrote:
> Yes, you need to copy it by hand. Same for libc0.3-i686, we just need to
> steal the cpuid code from the kfreebsd port of glibc.
+
+# IRC, freenode, #hurd, 2013-06-30
+
+ <pinotree> other than that, the hwcap system is not working for us yet,
+ right?
+ <youpi> no but we'd like to use e.g. cpuid for that
+ <youpi> like kfreebsd does
+ <pinotree> do they use cpuid for that?
+ <pinotree> i kind of lost myself in glibc's loading internals, trying to
+ find out where the hwcap bits come from
+ <youpi> on linux it comes from the kernel
+ <youpi> on kfreebsd aiui they use cpuid to figure it out from the process
+ itself
+ <pinotree> do you have any pointer to the kfreebsd way? iirc i had a look
+ in their sysdeps, but found nothing related to that
+ <youpi> it's in local-sysdeps.diff aiui
+ <youpi> +dl_platform_kfreebsd_i386_init
+ <youpi> which fills dl_hwcap
+ <youpi> called at _dl_sysdep_start
+ <pinotree> interesting
+
+
---
Having working CPUID code inside [[glibc]] is also a prerequisite for proper
diff --git a/open_issues/libmachuser_libhurduser_rpc_stubs.mdwn b/open_issues/libmachuser_libhurduser_rpc_stubs.mdwn
index 670c82cb..11bebd6e 100644
--- a/open_issues/libmachuser_libhurduser_rpc_stubs.mdwn
+++ b/open_issues/libmachuser_libhurduser_rpc_stubs.mdwn
@@ -133,3 +133,29 @@ License|/fdl]]."]]"""]]
<braunr> (imo, the mach_debug interface should be adjusted to be used with
privileged ports only)
<braunr> (well, maybe not all mach_debug RPCs)
+
+
+# `gnumach.defs`
+
+[[!message-id
+"CAEvUa7nd2LSUsMG9axCx5FeaD1aBvNxE4JMBe95b9hbpdqiLdw@mail.gmail.com"]].
+
+
+## IRC, freenode, #hurd, 2013-05-13
+
+ <braunr> youpi: what's the point of the last commit in the upstream hurd
+ repository (utils/vmstat: Use gnumach.defs from gnumach) ?
+ <braunr> or rather, i think i see the point, but then why do it only for
+ gnumach and not fot the rest ?
+ <braunr> for*
+ <youpi> most probably because nobody did it, probably
+ <braunr> aiui, it makes the hurd build process not rely on system headers
+ <youpi> (and nobody had any issue with it)
+ <braunr> well yes, that's why i'm wondering :)
+ <braunr> it looks perfectly fine to me to use system headers instead of
+ generating them
+ <youpi> ah right
+ <youpi> I thought there was actually a reason
+ <youpi> I'll revert
+ <youpi> could you answer David about it?
+ <braunr> sure
diff --git a/open_issues/libnetfs_passive_translators.mdwn b/open_issues/libnetfs_passive_translators.mdwn
new file mode 100644
index 00000000..db4c9005
--- /dev/null
+++ b/open_issues/libnetfs_passive_translators.mdwn
@@ -0,0 +1,55 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+[[!tag open_issue_hurd]]
+
+
+# IRC, freenode, #hurd, 2013-07-15
+
+ <teythoon> is there a libnetfs based translator that supports passive
+ translators?
+ <youpi> I don't see any at the top of my head, only with active ones such
+ as hostmux
+ <teythoon> I suspected as much since as far as I can tell libnetfs lacks
+ some bits to make that even work
+ <braunr> teythoon: the problem with passive translators is persistence
+ <braunr> well, it's easy to store volatile passive translators in a
+ libnetfs server
+ <braunr> but usually, there is no backing store for them
+ <braunr> ext2fs is the only one actually providing space to store their
+ command line
+ <teythoon> sure, but at least file_get_translator needs to work so that
+ procfs can serve a mounts node
+ <braunr> silly idea but
+ <braunr> don't you want to directly add it to the procfs translator ?
+ <teythoon> no, I think it's useful on its own
+ <braunr> ok
+ <braunr> then you may need to add the required support
+ <teythoon> it even doubles as normal command line tool
+ <teythoon> yes, I almost got it... or so I hope ;)
+ <braunr> ok
+ <teythoon> also, netfs_get_translator exists, so not supporting that feels
+ like a bug to me
+ <teythoon> could also be useful for a potential devfs translator
+ <braunr> yes
+
+ <teythoon> uh, the code duplication in lib*fs is really bad :/
+ <teythoon> the code is mostly similar, though they have diverged and many
+ little things are different so diffing them is very noisy
+ <teythoon> stuff like file names or identifiers
+ <teythoon> and I cannot figure out why my shiny passive translators are not
+ started :/
+ <teythoon> % showtrans tmp/mounts
+ <teythoon> /hurd/mtab.fixed /
+ <teythoon> % wc --bytes tmp/mounts
+ <teythoon> 0 tmp/mounts
+ <teythoon> and no mtab translator around either
+
+[[community/gsoc/project_ideas/mtab/discussion]].
diff --git a/open_issues/libnetfs_vs_libdiskfs.mdwn b/open_issues/libnetfs_vs_libdiskfs.mdwn
new file mode 100644
index 00000000..2fcfbea5
--- /dev/null
+++ b/open_issues/libnetfs_vs_libdiskfs.mdwn
@@ -0,0 +1,118 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+[[!tag open_issue_hurd]]
+
+[[!toc]]
+
+
+# Argument Parsing
+
+## IRC, freenode, #hurd, 2013-06-27
+
+ <teythoon> the arg parsing in libdiskfs and libnetfs differ :/
+ <teythoon> afaics libdiskfs gets it right, libnetfs does not
+ <pinotree> what do you mean?
+ <teythoon> wrt to *_std_{runtime,startup}_argp
+ <teythoon> see eg netfs.h
+ <teythoon> libdiskfs/opts-std-runtime.c:const struct argp
+ diskfs_std_runtime_argp =
+ <teythoon> libdiskfs/opts-std-runtime.c-{
+ <teythoon> libdiskfs/opts-std-runtime.c- std_runtime_options, parse_opt,
+ 0, 0, children
+ <teythoon> libdiskfs/opts-std-runtime.c-};
+ <teythoon> but
+ <teythoon> libnetfs/std-runtime-argp.c:const struct argp
+ netfs_std_runtime_argp = { 0 };
+ <pinotree> well there are no common startup/runtime options provided by
+ netfs
+ <pinotree> usually netfs-based translators put netfs_std_startup_argp as
+ child as their options, so if netfs starts providing options they would
+ work
+ <teythoon> ah
+ <pinotree> if you have a test showing issues, we can certainly look it :)
+ <teythoon> ok, m/b I was confused...
+ <pinotree> no worries, feel free to ask anytime
+ <teythoon> I thought about providing --update as common runtime flag, like
+ diskfs does, you think it's the right thing to do?
+ <pinotree> what would it do?
+ <teythoon> or should it be left for each translator to implement?
+ <teythoon> nothing by default I guess
+ <pinotree> options provided in libdiskfs are implemented and handled mostly
+ in libdiskfs itself
+ <pinotree> so imho a new option for libnetfs would be there because its
+ behaviour is implemented mostly within libnetfs itself
+ <teythoon> libdiskfs calls diskfs_reload_global_state
+ <teythoon> libnetfs could do the same, allowing translators to plug in
+ anything they wish
+ <teythoon> but I'll implement it in procfs for the time being
+ <pinotree> ah, its alias is remount
+ <teythoon> yes
+ <teythoon> I need that working for procfs
+ <teythoon> btw, I think I got your mount confusion thing figured out
+ <pinotree> but procfs has nothing to update/flush, all the information are
+ fetched at every rpc
+ <teythoon> yes
+ <teythoon> but we still need to ignore the flag
+ <teythoon> otherwise the set_options rpc fails
+ <teythoon> http://paste.debian.net/12938/
+ <teythoon> whee, remounting proc works :)
+ <braunr> :)
+
+
+# IRC, freenode, #hurd, 2013-07-29
+
+ <teythoon> so, what do you folks think about refactoring libdiskfs and
+ libnetfs to be more alike?
+ <pinotree> what do you mean?
+ <teythoon> ah, I mentioned that in the context of my mtab prototype
+ 1374247519-26589-1-git-send-email-4winter@informatik.uni-hamburg.de
+ <teythoon> they are hard to diff against each other b/c they differ in file
+ names and identifier names
+ <teythoon> while working on the mtab stuff I encountered stuff that was
+ implemented in libdiskfs, but never in libnetfs
+ <teythoon> mostly support for binding translators to libnetfs nodes
+ <braunr> teythoon: sure, but looks a little out of scope
+ <teythoon> braunr: I do not mean now, more in general
+ <braunr> ok
+ <tschwinge> teythoon: I wondered about this, too. I don't know if it's
+ possible to literally merge them (and build the backend-based (libdiskfs)
+ vs. volatile-backend one (libnetfs) based on a pre-processor define or
+ similar), or just structure the source code (files) in a way such that
+ »diff -ru libdiskfs/ libnetfs/« gives meaningful results, figuratively
+ spoken.
+ <teythoon> tschwinge: my thoughts exactly
+
+
+# IRC, freenode, #hurd, 2013-08-28
+
+ <teythoon> braunr: do you think another lib*fs would be frowned uppon? I
+ like the way procfs is structured and that could be refactored and
+ generalized into a library
+ <braunr> i think we need more lib*fs libraries
+ <braunr> and better integration
+ <braunr> that's one of the strengths in linux
+ <braunr> it makes writing file systems very easy
+ <teythoon> cool :)
+ <teythoon> now we only need a snappy name, any suggestions?
+ <braunr> i don't know what you like specificlaly in procfs
+ <braunr> libpseudofs ?
+ <teythoon> well, it's not perfect, but i like the way you just have to
+ implement a function for a node and it magically gains the ability to
+ being read
+ <teythoon> for example
+ <braunr> yes i see
+ <pinotree> lacks a bit of caching though
+ <braunr> no caching for such file systems
+ <teythoon> indeed
+ <braunr> why would you want caching ?
+ <pinotree> you might have files that don't change at all, or rarely do
+ <braunr> the premise is that it's meant for files generated on the fly
+ <braunr> but are they big ?
diff --git a/open_issues/libpthread.mdwn b/open_issues/libpthread.mdwn
index e2fda122..0b426884 100644
--- a/open_issues/libpthread.mdwn
+++ b/open_issues/libpthread.mdwn
@@ -974,7 +974,7 @@ Most of the issues raised on this page has been resolved, a few remain.
<braunr> exec weights 164M eww, we definitely have to fix that leak
<braunr> the leaks are probably due to wrong mmap/munmap usage
-[[exec_leak]].
+[[exec_memory_leaks]].
### IRC, freenode, #hurd, 2012-08-29
@@ -1260,7 +1260,7 @@ Most of the issues raised on this page has been resolved, a few remain.
<braunr> i'll add traces to know which step causes the error
-### IRC, freenode, #hurd, 2012-12-11
+#### IRC, freenode, #hurd, 2012-12-11
<youpi> braunr: mktoolnix seems like a reproducer for the libports thread
priority issue
@@ -1273,7 +1273,7 @@ Most of the issues raised on this page has been resolved, a few remain.
<youpi> that's it, yes
-### IRC, freenode, #hurd, 2013-03-01
+#### IRC, freenode, #hurd, 2013-03-01
<youpi> braunr: btw, "unable to adjust libports thread priority: (ipc/send)
invalid destination port" is actually not a sign of fatality
@@ -1284,6 +1284,34 @@ Most of the issues raised on this page has been resolved, a few remain.
<braunr> weird sentence, agreed :p
+#### IRC, freenode, #hurd, 2013-06-14
+
+ <gnu_srs> Hi, when running check for gccgo the following occurs (multiple
+ times) locking up the console
+ <gnu_srs> unable to adjust libports thread priority: (ipc/send) invalid
+ destination port
+ <gnu_srs> (not locking up the console, it was just completely filled with
+ messages))
+ <braunr> gnu_srs: are you running your translator as root ?
+ <braunr> or, do you have a translator running as an unprivileged user ?
+ <braunr> hm, invalid dest port
+ <braunr> that's a bug :p
+ <braunr> but i don't know why
+ <braunr> i'll have to take some time to track it down
+ <braunr> it might be a user ref overflow or something similarly tricky
+ <braunr> gnu_srs: does it happen everytime you run gccgo checks or only
+ after the system has been alive for some time ?
+ <braunr> (some time being at least a few hours, more probably days)
+
+#### IRC, freenode, #hurd, 2013-07-05
+
+ <braunr> ok, found the bug about invalid ports when adjusting priorities
+ <braunr> thhe hurd must be plagued with wrong deallocations :(
+ <braunr> i have so many problems when trying to cleanly destroy threads
+
+[[libpthread/t/fix_have_kernel_resources]].
+
+
### IRC, freenode, #hurd, 2013-03-11
<braunr> youpi: oh btw, i noticed a problem with the priority adjustement
@@ -1296,6 +1324,171 @@ Most of the issues raised on this page has been resolved, a few remain.
<youpi> uh
<youpi> indeed
+### IRC, freenode, #hurd, 2013-07-01
+
+ <youpi> braunr: it seems as if pfinet is not prioritized enough
+ <youpi> I'm getting network connectivity issues when the system is quite
+ loaded
+ <braunr> loaded with what ?
+ <braunr> it could be ext2fs having a lot more threads than other servers
+ <youpi> building packages
+ <youpi> I'm talking about the buildds
+ <braunr> ok
+ <braunr> ironforge or others ?
+ <youpi> they're having troubles uploading packages while building stuff
+ <youpi> ironforge and others
+ <youpi> that happened already in the past sometimes
+ <youpi> but at the moment it's really pronounced
+ <braunr> i don't think it's a priority issue
+ <braunr> i think it's swapping
+ <youpi> ah, that's not impossible indeed
+ <youpi> but why would it swap?
+ <youpi> there's a lot of available memory
+ <braunr> a big file is enough
+ <braunr> it pushes anonymous memory out
+ <youpi> to fill 900MiB memory ?
+ <braunr> i see 535M of swap on if
+ <braunr> yes
+ <youpi> ironforge is just building libc
+ <braunr> and for some reason, swapping is orders of magnitude slower than
+ anything else
+ <youpi> not linking it yet
+ <braunr> i also see 1G of free memory on it
+ <youpi> that's what I meant with 900MiB
+ <braunr> so at some point, it needed a lot of memory, caused swapping
+ <braunr> and from time to time it's probably swapping back
+ <youpi> well, pfinet had all the time to swap back already
+ <youpi> I don't see why it should be still suffering from it
+ <braunr> swapping is a kernel activity
+ <youpi> ok, but once you're back, you're back
+ <youpi> unless something else pushes you out
+ <braunr> if the kernel is busy waiting for the default pager, nothing makes
+ progress
+ <braunr> (eccept the default pager hopefully)
+ <youpi> sure but pfinet should be back already, since it does work
+ <youpi> so I don't see why it should wait for something
+ <braunr> the kernel is waiting
+ <braunr> and the kernel isn't preemptibl
+ <braunr> e
+ <braunr> although i'm not sure preemption is the problem here
+ <youpi> well
+ <youpi> what I don't understand is what we have changed that could have so
+ much impact
+ <youpi> the only culprit I can see is the priorities we have changed
+ recently
+ <braunr> do you mean it happens a lot more frequently than before ?
+ <youpi> yes
+ <youpi> way
+ <braunr> ok
+ <youpi> ironforge is almost unusable while building glibc
+ <youpi> I've never seen that
+ <braunr> that's weird, i don't have these problems on darnassus
+ <braunr> but i think i reboot it more often
+ <braunr> could be a scalability issue then
+ <braunr> combined with the increased priorities
+ <braunr> if is indeed running full time on the host, whereas swapping
+ issues show the cpu being almost idle
+ <braunr> loadavg is high too so i guess there are many threads
+ <braunr> 0 971 3 -20 -20 1553 305358625 866485906 523M 63M * S<o
+ ? 13hrs /hurd/ext2fs.static -A /dev/hd0s2
+ <braunr> 0 972 3 -20 -20 1434 125237556 719443981 483M 5.85M * S<o
+ ? 13hrs /hurd/ext2fs.static -A /dev/hd0s3
+ <braunr> around 1k5 each
+ <youpi> that's quite usual
+ <braunr> could be the priorities then
+ <braunr> but i'm afraid that if we lower them, the number of threads will
+ grow out of control
+ <braunr> (good thing is i'm currently working on a way to make libpthread
+ actually remove kernel resources)
+ <youpi> but the priorities should be the same in ext2fs and pfinet,
+ shouldn't they?
+ <braunr> yes but ext2 has a lot more threads than pfinet
+ <braunr> the scheduler only sees threads, we don't have a grouping feature
+ <youpi> right
+ <braunr> we also should remove priority depressing in glibc
+ <braunr> (in sched_yield)
+ <braunr> it affects spin locks
+
+ <braunr> youpi: is it normal to see priorities of 26 ?
+ <youpi> braunr: we have changed the nice factor
+ <braunr> ah, factor
+ <youpi> Mm, I'm however realizing the gnumach kernel running these systems
+ hasn't been upgraded in a while
+ <youpi> it may not even have the needed priority levels
+ <braunr> ar euare you using top right now on if ?
+ <braunr> hm no i don't see it any more
+ <braunr> well yes, could be the priorities ..
+ <youpi> I've rebooted with an upgraded kernel
+ <youpi> no issue so far
+ <youpi> package uploads will tell me on the long run
+ <braunr> i bet it's also a scalability issue
+ <youpi> but why would it appear now only?
+ <braunr> until the cache and other data containers start to get filled,
+ processing is fast enough that we don't see it hapenning
+ <youpi> sure, but I haven't seen that in the past
+ <braunr> oh it's combined with the increased priorities
+ <youpi> even after a week building packages
+ <braunr> what i mean is, increased priorities don't affect much if threads
+ porcess things fast
+ <braunr> things get longer with more data, and then increased prioritis
+ give more time to these threads
+ <braunr> and that's when the problem appears
+ <youpi> but increased priorities give more time to the pfinet threads too,
+ don't they?
+ <braunr> yes
+ <youpi> so what is different ?
+ <braunr> but again, there are a lot more threads elsewhere
+ <braunr> with a lot more data to process
+ <youpi> sure, but that has alwasy been so
+ <braunr> hm
+ <youpi> really, 1k5 threads does not surprise me at all :)
+ <youpi> 10k would
+ <braunr> there aren't all active either
+ <youpi> yes
+ <braunr> but right, i don't know why pfinet would be given less time than
+ other threads ..
+ <braunr> compared to before
+ <youpi> particularly on xen-based buildds
+ <braunr> libpthread is slower than cthreads
+ <youpi> where it doesn't even have to wait for netdde
+ <braunr> threads need more quanta to achieve the same ting
+ <braunr> perhaps processing could usually be completed in one go before,
+ and not any more
+ <braunr> we had a discussion about this with antrik
+
+ <braunr> youpi: concerning the buildd issue, i don't think pfinet is
+ affected actually
+ <braunr> but the applications using the network may be
+ <youpi> why using the network would be a difference ?
+ <braunr> normal applications have a lower priority
+ <braunr> what i mean is, pfinet simply has nothing to do, because normal
+ applications don't have enough cpu time
+ <braunr> (what you said earlier seemed to imply pfinet had issues, i don't
+ think it has)
+ <braunr> it should be easy to test by pinging the machine while under load
+ <braunr> we should also check the priority of the special thread used to
+ handle packets, both in pfinet and netdde
+ <braunr> this one isn't spawned by libports and is likely to have a lower
+ priority as well
+
+ <braunr> youpi: you're right, something very recent slowed things down a
+ lot
+ <braunr> perhaps the new priority factor
+ <braunr> well not the factor but i suppose the priority range has been
+ increased
+
+[[nice_vs_mach_thread_priorities]].
+
+ <youpi> braunr: haven't had any upload issue so far
+ <youpi> over 20 uploads
+ <youpi> while it was usually 1 every 2 before...
+ <youpi> so it was very probably the kernel missing the priorities levels
+ <braunr> ok
+ <braunr> i think i've had the same problem on another virtual machine
+ <braunr> with a custom kernel i built a few weeks ago
+ <braunr> same kind of issue i guess
+ <braunr> it's fine now, and always was on darnassus
+
## IRC, freenode, #hurd, 2012-12-05
diff --git a/open_issues/libpthread/t/fix_have_kernel_resources.mdwn b/open_issues/libpthread/t/fix_have_kernel_resources.mdwn
index 10577c1e..6f09ea0d 100644
--- a/open_issues/libpthread/t/fix_have_kernel_resources.mdwn
+++ b/open_issues/libpthread/t/fix_have_kernel_resources.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2012, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -10,7 +10,9 @@ License|/fdl]]."]]"""]]
[[!tag open_issue_libpthread]]
-`t/have_kernel_resources`
+`t/fix_have_kernel_resources`
+
+Address problem mentioned in [[/libpthread]], *Threads' Death*.
# IRC, freenode, #hurd, 2012-08-30
@@ -19,3 +21,395 @@ License|/fdl]]."]]"""]]
<braunr> tschwinge: i.e. the ability to tell the kernel where the stack is,
so it's unmapped when the thread dies
<braunr> which requiring another thread to perform this deallocation
+
+
+## IRC, freenode, #hurd, 2013-05-09
+
+ <bddebian> braunr: Speaking of which, didn't you say you had another "easy"
+ task?
+ <braunr> bddebian: make a system call that both terminates a thread and
+ releases memory
+ <braunr> (the memory released being the thread stack)
+ <braunr> this way, a thread can completely terminates itself without the
+ assistance of a managing thread or deferring work
+ <bddebian> braunr: That's "easy" ? :)
+ <braunr> bddebian: since it's just a thread_terminate+vm_deallocate, it is
+ <braunr> something like thread_terminate_self
+ <bddebian> But a syscall not an RPC right?
+ <braunr> in hurd terminology, we don't make the distinction
+ <braunr> the only real syscalls are mach_msg (obviously) and some to get
+ well known port rights
+ <braunr> e.g. mach_task_self
+ <braunr> everything else should be an RPC but could be a system call for
+ performance
+ <braunr> since mach was designed to support clusters, it was necessary that
+ anything not strictly machine-local was an RPC
+ <braunr> and it also helps emulation a lot
+ <braunr> so keep doing RPCs :p
+
+
+## IRC, freenode, #hurd, 2013-05-10
+
+ <braunr> i'm not sure it should only apply to self though
+ <braunr> youpi: can we get a quick opinion on this please ?
+ <braunr> i've suggested bddebian to work on a new RPC that both terminates
+ a thread and releases its stack to help fix libpthread
+ <braunr> and initially, i thought of it as operating only on the calling
+ thread
+ <braunr> do you see any reason to make it work on any thread ?
+ <braunr> (e.g. a real thread_terminate + vm_deallocate)
+ <braunr> (or any reason not to)
+ <youpi> thread stack deallocation is always a burden indeed
+ <youpi> I'd tend to think it'd be useful, but perhaps ask the list
+
+
+## IRC, freenode, #hurd, 2013-06-26
+
+ <braunr> looks like there is a port right leak in libpthread
+ <braunr> grmbl, the port leak seems to come from mach_port_destroy being
+ buggy :/
+ <braunr> hum, apparently we're not the only ones to suffer from port leaks
+ wrt mach_port_destroy
+ <braunr> ew, libpthread is leaking
+ <pinotree> memory or ports?
+ <braunr> both
+ <pinotree> sounds great ;)
+ <braunr> as it is, libpthread doesn't destroy threads
+ <braunr> it queues them so they're recycled late
+ <braunr> r
+ <braunr> but there is confusion between the thread structure itself and its
+ internal resources
+ <braunr> i.e. there is pthread_alloc which allocates a thread structure,
+ and pthread_create which allocates everything else
+ <braunr> but on pthread_exit, nothing is destroyed
+ <braunr> when a thread structure is reused, its internal resources are
+ replaced by new instances
+ <pinotree> oh
+ <braunr> it's ok for joinable threads but most of our threads are detached
+ <braunr> pinotree: as expected, it's bigger than expected :p
+ <braunr> so i won't be able to write a quick fix
+ <braunr> the true way to fix this is make it possible for threads to free
+ their own resources
+ <braunr> let's do that :p
+ <braunr> ok, got the new thread termination function, i'll build eglibc
+ package providing it, then experiment with libpthread
+ <pinotree> braunr: iirc there's also a tschwinge patch in the debian eglibc
+ about that
+ <braunr> ah
+ <pinotree> libpthread_fix.diff
+ <braunr> i see
+ <braunr> thanks for the notice
+ <braunr> bddebian:
+ http://www.sceen.net/~rbraun/0001-thread_terminate_deallocate.patch
+ <braunr> bddebian: this is what it looks like
+ <braunr> see, short and easy
+ <bddebian> Aye but didn't youpi say not to bother with it??
+ <braunr> he did ?
+ <braunr> i don't remember
+ <bddebian> I thought that was the implication. Or maybe that was the one I
+ already did!?
+ <braunr> i'd be interested in reading that
+ <braunr> anyway, there still are problems in libpthread, and this call is
+ one building block to fix some of them
+ <braunr> some important ones
+ <braunr> (big leaks)
+
+
+## IRC, freenode, #hurd, 2013-06-29
+
+ <braunr> damn, i fix leaks in libpthread, only to find out leaks somewhere
+ else :(
+ <braunr> bddebian: ok, actually it was a bit more complicated than what i
+ showed you
+ <braunr> because in addition to the stack, the call must also release the
+ send right in the caller's ipc space
+ <braunr> (it can't be released before since there would be no mean to
+ reference the thread to destroy)
+ <braunr> or perhaps it should strictly be reserved to self termination
+ <braunr> hmm
+ <braunr> yes it would probably be simpler
+ <braunr> but it should be a decent compromise
+ <braunr> i'm close to having a libpthread that doesn't leak anything
+ <braunr> and that properly destroys threads and their resources
+
+
+## IRC, freenode, #hurd, 2013-06-30
+
+ <braunr> bddebian: ok, it was even more tricky, because the kernel would
+ save the return value on the user stack (which is released by the call
+ and then invalid) before checking for asynchronous software traps (ASTs,
+ a kind of software interrupts in mach), and terminating the calling
+ thread is done by a deferred AST ... :)
+ <braunr> hmm, making threads able to terminate themselves makes rpctrace a
+ bit useless :/
+ <braunr> well, more restricted
+
+ <braunr> ok so, tough question :
+ <braunr> i have a small test program that creates a thread, and inspect its
+ state before any thread dies
+ <braunr> i can see msg_report_wait requests when using ps
+ <braunr> (one per thread)
+ <braunr> one of these requests create a new receive right, apparently for
+ the second thread in the test program
+ <braunr> each time i use ps, i can see the sequence numbers of two receive
+ rights increase
+ <braunr> i guess these rights are related to proc and signal handling per
+ thread
+ <braunr> but i can't find what create them
+ <braunr> does anyone know ?
+ <braunr> tschwing_: ^ :)
+
+ <braunr> again, too many things wrong elsewhere to cleanly destroy threads
+ ..
+ <braunr> something is deeply wrong with controlling terminals ..
+
+
+## IRC, freenode, #hurd, 2013-07-01
+
+ <braunr> youpi: if you happen to notice what receive right is created for
+ each thread (beyond the obvious port used for blocking and waking up),
+ please let me know
+ <braunr> it's the only port leak i have with thread destruction
+ <braunr> and i think it's related to the proc server since i see the
+ sequence number increase every time i use ps
+
+ <braunr> pinotree: my change doesn't fix all the pthread leaks but it's a
+ lot better
+ <braunr> bddebian: i've spent almost the whole week end trying to find the
+ last port leak without success
+ <braunr> there is some weird bug related to the controlling tty that hits
+ me every time i try to change something
+ <braunr> it's the same bug that prevents ttys from being correctly closed
+ when using ssh or screen
+ <braunr> well maybe not the same, but it's close
+ <braunr> some stale receive right kept around for no apparent reason
+ <braunr> and i can't find its source
+
+
+## IRC, freenode, #hurd, 2013-07-02
+
+ <braunr> and btw, i don't think i can make my libpthread patch work
+ <braunr> i'll just aim at avoiding leaks, but destroying threads and their
+ related resources depends on other changes i don't clearly see
+
+
+## IRC, freenode, #hurd, 2013-07-03
+
+ <braunr> grmbl, i don't want to give up thread destruction ..
+
+
+## IRC, freenode, #hurd, 2013-07-15
+
+ <braunr> btw, my work on thread destruction is currently stalled
+ <braunr> i don't have much free time right now
+
+
+## IRC, freenode, #hurd, 2013-09-13
+
+ <braunr> i think i know why my thread_terminate_deallocate patches leak one
+ receive port :>
+ <braunr> but now i'm not sure of the proper solution
+ <braunr> every time a thread is created and destroyed, a receive right is
+ leaked
+ <braunr> i guess it's simply the reply port ..
+ <braunr> grmbl
+ <braunr> i guess i have to make it a simpleroutine ...
+ <braunr> hm too bad, it's not the reply port :(
+ <braunr> it's also leaking some memory
+ <braunr> it doesn't seem related to my changes though
+ <braunr> stacks, rights, and threads are correctly destroyed
+ <braunr> some obscure state is left behind
+ <braunr> i wonder how exception ports are dealt with
+ <braunr> vminfo seems to confirm memory is leaking in the heap
+ <braunr> humpf
+ <braunr> oh silly me
+ <braunr> i don't detach threads
+ <teythoon> well, detach them ;)
+ <braunr> hm worse :p
+ <braunr> now i get additional dead names
+ <braunr> but it's a step forward
+
+
+## IRC, freenode, #hurd, 2013-09-16
+
+ <braunr> that thread port leak is so strange
+ <braunr> the leaked port seems to be created when the new thread starts
+ running
+ <braunr> so it looks like a port the kernel would implicitely create
+ <braunr> hm could it be a thread-specific reply port ?
+ <youpi> ah, yes, there is one of those
+ <braunr> how come mach/mig-reply.c in glibc isn't thread-safe ?
+ <youpi> it is overriden by sysdeps/mach/hurd/img-reply.c I guess
+ <youpi> which uses a threadvar for the mig reply port
+ <braunr> oh
+ <youpi> talking of which, there is also last_value in
+ sysdeps/mach/strerror_l.c
+ <youpi> strerror_thread_freeres is supposed to get called, but who knows
+ <braunr> it does look to be that port
+ <youpi> iirc that's the issue which prevents from letting us make threads
+ exit on idleness?
+ <braunr> one of them
+ <youpi> ok
+ <braunr> maybe the only one, yes
+ <braunr> i see memory leaks but they could be related/normal
+ <braunr> (i.e. not actual leaks)
+ <braunr> on the other hand, i also can't boot a hurd with my patch
+ <braunr> but i consider removing such leaks a priority
+ <braunr> does anyone know the semantic difference between
+ __mig_put_reply_port and __mig_dealloc_reply_port ?
+ <braunr> i guess __mig_dealloc_reply_port is actually a destruction
+ operation, right ?
+ <youpi> AIUI, dealloc is used when one wants the port not to be reused at
+ all
+ <youpi> because it has been used as a reference for something, and can
+ still be currently in use
+ <youpi> while put_reply would be when we're really done with it, and won't
+ use it again, and can thus be used as such
+ <youpi> or at least something like that
+ <braunr> heh
+ <braunr> __mig_dealloc_reply_port calls __mach_port_mod_refs, which is a
+ RPC, and creates a new reply port when destroying the current one
+ <youpi> bah
+ <youpi> that's fine, it's a deref of the old port, which is not in the
+ reply_port variable any more
+ <braunr> it's fine, but still a leak
+ <youpi> well, dealloc does not completely deallocs, yes
+ <braunr> that's not really the problem here
+ <braunr> i've introduced a case that wasn't considered at the time, namely
+ that a thread can destroy itself
+ <youpi> we probably need another function to be called from the thread exit
+ <braunr> i'll simply try with mach_port_destroy
+ <braunr> mach_port_destroy seems to be a RPC too ...
+ <braunr> grmbl
+ <youpi> isn't there a trap version somehow ?
+ <braunr> not in libc
+ <youpi> erf
+ <braunr> at least i know what's wrong now :)
+ <braunr> there still is a small memory leak i have to investigate
+ <braunr> but outside the stack
+ <braunr> the stack, the thread name and the thread are correctly destroyed
+ <braunr> slabinfo confirms only one port leak and nothing else is leaked
+ <braunr> ok so the port leak was indeed the thread-specific reply port,
+ taken care of
+ <braunr> there are also memory leaks too
+
+
+## IRC, freenode, #hurd, 2013-09-17
+
+ <braunr> teythoon: on my side, i'm getting to know our threading
+ implementation better
+ <braunr> closing to clean thread destruction
+ <braunr> x15 ipc will hide reply ports ;p
+ <braunr> memory leaks solved \o/
+ <braunr> now, have to fix memory release when joining
+ <braunr> proper reference counting on detach/join/exit, let's see how it
+ goes ..
+ <braunr> seems to work fine
+
+
+## IRC, freenode, #hurd, 2013-09-18
+
+ <braunr> ok i'll soon have gnumach and libc packages including proper
+ thread destruction :>
+ <teythoon> braunr: why did you have to touch gnumach?
+ <braunr> to add a call allowing threads to release ports and memory
+ <braunr> i.e. their last self reference, their reply port and their stack
+ <braunr> let me public my current patches
+ <teythoon> braunr: thread_commit_suicide ?
+ <braunr> hehe
+ <braunr> initially thread_terminate_self but
+ <braunr> it can be used by other threads too
+ <braunr> to i named it thread_terminate_release
+ <braunr> http://darnassus.sceen.net/~rbraun/0001-pthread_thread_halt.patch
+ <braunr>
+ http://darnassus.sceen.net/~rbraun/0001-thread_terminate_release.patch
+ <braunr> the pthread patch needs to be polished because it changes the
+ semantics of pthread_thread_halt
+ <braunr> but other than that, it should be complete
+ <pinotree> pthread_thread_halt_reallyhalt
+ <braunr> ok let's try these libc packages
+ <braunr> old static ext2fs for the root, but other than that, it boots
+ <braunr> let's try iceweasel
+ <braunr> (i'll need to build a hurd package against this new libc, removing
+ the libports_stability patch which prevents thread destruction in servers
+ on the way)
+ <teythoon> prevents thread destruction o_O
+ <braunr> yes
+ <braunr> in libports only ;p
+ <teythoon> oh, *only* in libports, I assumed for a moment that it affected
+ almost every component of the Hurd...
+ <teythoon> *phew(
+ <braunr> ... :)
+ <braunr> that's why, after a burst of messages, say because of aptitude
+ (select), you may see a few hundred threads still hanging around
+ <braunr> also why unused servers remain running even after several minutes,
+ where the normal timeout is 2mins
+ <teythoon> I wondered about that, some servers (symlink comes to mind) seem
+ to go away if unused (or that's how I read the code)
+ <braunr> symlinks are usually not servers, since most of them actually
+ exist in file systems, and are implemented through an optimization
+ <teythoon> yes I know that
+ <teythoon> trans/symlink.c reads:
+ <teythoon> /* The timeout here is 10 minutes */
+ <teythoon> err = mach_msg_server_timeout (fsys_server, 0, control,
+ <teythoon> MACH_RCV_TIMEOUT, 1000 * 60 * 10);
+ <teythoon> if (err == MACH_RCV_TIMED_OUT)
+ <teythoon> exit (0);
+ <braunr> ok
+ <teythoon> hm, /hurd/symlink doesn't feel at all like a symlink... but
+ works like one
+ <braunr> well, starting iceweasel makes X on my host freeze oO
+ <braunr> bbl
+ <teythoon> /hurd/symlink translators do go away after being unused for 10
+ minutes... this is funny if they are set up by hand instead of being
+ started from a passive translator record
+ <teythoon> magically vanishing symlinks ;)
+
+
+## IRC, freenode, #hurd, 2013-09-19
+
+ <braunr> hum, i can't rebuild a hurd package :(
+ <teythoon> braunr: with your thread destruction patches in libc?
+ <braunr> yes but it's unrelated
+ <braunr> In file included from ../../libdiskfs/boot-start.c:38:0:
+ <braunr> ./fsys_reply_U.h:173:15: error: conflicting types for
+ ‘fsys_get_children’
+ <braunr> i didn't see a new libc debian release
+ <teythoon> hm, David reported that as well
+ <teythoon>
+ id:CAEvUa7=QzOiS41G5Vq8k4AiaN10jAPm+CL_205OHJnL0xpJXbw@mail.gmail.com
+ <teythoon> uh oh
+ <teythoon> it seems I didn't add a _reply suffix to the reply routines :/
+ <teythoon> there's quite a bit of fallout from my patches, I kinda feel bad
+ :(
+ <braunr> teythoon: what i'm wondering is what youpi did too, since he got
+ hurd binary packages
+ <teythoon> braunr: well neither he nor I noticed that b/c for us the
+ declarations were just missing
+ <braunr> from libc you mean ?
+ <braunr> or hum gnumach-common ?
+ <teythoon> not sure actually
+ <braunr> no it's not a gnumach thing
+ <braunr> hurd-dev then
+ <teythoon> the build system should have cought these, or mig...
+ <braunr> also, i see you changed fsys_reply.defs, but nothing about
+ fsys_request.defs
+ <teythoon> I have no fsys_requests.defs
+ <braunr> looks like there was no fsys_request.defs in the first place
+ ... *sigh*
+ <braunr> do you know an application that often creates and destroys threads
+ ?
+ <teythoon> no, sorry
+ <pinotree> maybe some test suite
+ <braunr> ah right
+ <braunr> sysbench maybe
+ <braunr> also, i've been hit by a lot more network deadlocks than usual
+ lately
+ <braunr> fixing netdde has gained some priority in my todo list
+
+
+## IRC, freenode, #hurd, 2013-09-20
+
+ <braunr> oh, git is multithreaded
+ <braunr> great
+ <braunr> so i've actually tested my libpthread patch quite a lot
diff --git a/open_issues/libpthread_assertion_thread_prevp.mdwn b/open_issues/libpthread_assertion_thread_prevp.mdwn
index e8160528..f93f07d6 100644
--- a/open_issues/libpthread_assertion_thread_prevp.mdwn
+++ b/open_issues/libpthread_assertion_thread_prevp.mdwn
@@ -87,3 +87,23 @@ failed"]]
<braunr> removing the libports_stability patch exposed bugs in libpthread,
triggering assertions when queueing/dequeue threads from a queue (but i
don't know which one / in which function)
+
+
+## IRC, freenode, #hurd, 2013-06-25
+
+ <pinotree> braunr:
+ https://buildd.debian.org/status/fetch.php?pkg=libmemcached&ver=1.0.17-2&arch=hurd-i386&stamp=1372165732
+ <pinotree> make: ./pthread/pt-internal.h:122: __pthread_enqueue: Assertion
+ `thread->prevp == 0' failed. \o/
+ <pinotree> (it should rather be /o\, but better pretend not)
+ <braunr> pinotree: yes, we regularly see it
+ <braunr> pinotree: how long has the machine been running at this point ?
+ <pinotree> dunno, you should ask samuel about that
+ <pinotree> does it happen after N hours/days?
+ <braunr> a few days of moderate to high activity yes
+ <pinotree> ah ok
+ <braunr> and i actually see this error much more often when i disable the
+ libports stability patch in the hurd debian package
+ <braunr> so i guess something is wrong with thread recycling
+ <braunr> but i wanted to completely rewrite that part with the new kernel
+ call i asked bddebian to work on :)
diff --git a/open_issues/libpthread_dlopen.mdwn b/open_issues/libpthread_dlopen.mdwn
index 05a07ef2..5d574261 100644
--- a/open_issues/libpthread_dlopen.mdwn
+++ b/open_issues/libpthread_dlopen.mdwn
@@ -1,4 +1,5 @@
-[[!meta copyright="Copyright © 2011, 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2011, 2012, 2013 Free Software Foundation,
+Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -112,7 +113,18 @@ IRC, freenode, #hurd, 2011-08-17
[[packaging_libpthread]]
----
+
+# IRC, freenode, #hurd, 2013-09-03
+
+ <gnu_srs> iceweasel: ./pthread/../sysdeps/generic/pt-mutex-timedlock.c:70:
+ __pthread_mutex_timedlock_internal: Assertion `__pthread_threads' failed.
+ <pinotree> LD_PRELOAD libpthread
+ <gnu_srs> why
+ <pinotree> missing link to pthread?
+ <pinotree> and yes, it's known already, just nobody worked on solving it
+
+
+# libthreads vs. libpthread
The same symptom appears in an odd case, for instance:
diff --git a/open_issues/llvm.mdwn b/open_issues/llvm.mdwn
index 2a4b4ed5..4da58579 100644
--- a/open_issues/llvm.mdwn
+++ b/open_issues/llvm.mdwn
@@ -97,6 +97,39 @@ a06fe9183fbffb78798a444da9bc3040fdd444aa (2013-03-23), test-suite
2012 Debian project,
<http://wiki.debian.org/SummerOfCode2012/StudentApplications/AndrejBelym>.
+ * [[sanitizers|_san]]
+
+ A lot of Linux-specific things.
+
+ * IRC, OFTC, #debian-hurd, 2013-09-05:
+
+ <gg0> how can this fix it on {kf,hurd}-i386?
+ http://anonscm.debian.org/viewvc/pkg-llvm/llvm-toolchain/branches/3.3/debian/patches/libstdc%2B%2B-header-i386.diff?view=markup&sortby=date&pathrev=830
+ <pinotree> what makes you think it does?
+ <pinotree> it fixes #714890, which has nothing to do with hurd or
+ kfreebsd
+ <gg0> i simple wouldn't add a patch that fixes it on one i386 arch
+ only, being aware there are others
+ <pinotree> meet sylvestre
+
+ * IRC, freenode, #hurd, 2013-09-05:
+
+ <pinotree> tschwinge: iirc you were working on llvm/clang, weren't you?
+ <tschwinge> pinotree: That's right. I have patches to
+ follow-up/rework. Stalled at the moment, as you probably already
+ guessed... %-)
+ <pinotree> tschwinge: <Sylvestre> by the way, pinotree if you have time
+ for hurd stuff, I would be glad to have your help to port
+ llvm-toolchain-3.3 to hurd. I am having some issues with threading
+ aspects https://paste.debian.net/35466/
+ <pinotree> he's the debian packager of llvm
+ <tschwinge> That paste is for LLDB -- which I'd not assume to be in a
+ shape usable for Hurd.
+ <tschwinge> (I didn't look at it at all.)
+ <pinotree> tschwinge: if you look at the latest llvm-toolchain-3.3
+ debian source, there's a lldb-hurd.diff patch, which starts some
+ include header dance
+
# Build
diff --git a/open_issues/mach_migrating_threads.mdwn b/open_issues/mach_migrating_threads.mdwn
index c14ce95a..bbc6ac45 100644
--- a/open_issues/mach_migrating_threads.mdwn
+++ b/open_issues/mach_migrating_threads.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2011, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -15,3 +15,89 @@ License|/fdl]]."]]"""]]
* [[microkernel/mach/memory_object/discussion]]
* [[resource_management_problems]]
+
+
+# IRC, freenode, #hurd, 2013-08-13
+
+In context of [[resource_management_problems]].
+
+ <braunr> and thread migration itself is something very confusing
+ <braunr> it's better to think of it as scheduling context inheritance
+ <teythoon> braunr: I read the paper I mentioned and then I wanted to find
+ the sources they modified
+ <teythoon> I failed
+ <teythoon> I hate scientific paper about software that fail to provide the
+ source code
+ <teythoon> that's not science imho b/c it's not reproducible
+ <braunr> i have some osf source code here
+ <braunr> i'll send it if you want
+ <teythoon> ah interesting
+ <braunr> but really, when you dive into it, thread migration is merely
+ scheduling context inheritance with kernel upcalls
+ <braunr> it's good
+ <teythoon> I searched for osf mach but google didn't turned up anything
+ <braunr> but it has nothing to do with resource accounting
+ <braunr> (well, it may hepl better account for cpu time actually)
+ <braunr> help*
+ <braunr> but that's all
+ <teythoon> why is that all? wouldn't that be transitive and could also be
+ used for i/o accounting?
+ <teythoon> also I tried to find alternative mach implementations
+ <teythoon> I wasn't terribly successful, and some sites are gone or
+ unmaintained for years :/
+ <braunr> we don't need that for io accountin
+ <braunr> g
+ <braunr> thread migration is a kernel property
+ <braunr> on mach with userspace drivers, io isn't
+ <braunr> mach should only control cpu and memory
+ <braunr> and you though you can account physical memory, you can't transfer
+ virtual memory accounting from one task to another
+ <teythoon> yes, but once all of those resources can be accounted to the
+ thread initiating whatever it needs doing, shouldn't that be much easier?
+ <braunr> teythoon: it's not required for that
+ <braunr> teythoon: keep in mind userspace sees activations
+ <braunr> in a thread migration enabled kernel, activations are what we
+ usually call threads, and threads are scheduling contexts
+ <teythoon> braunr: ok, so TM is not required for accounting, but surely
+ it's a good thing to have, no?
+ <braunr> teythoon: it's required for cpu accounting only
+ <braunr> which is very important :)
+ <braunr> if you look carefully, you'll see hurd servers are what use most
+ cpu
+ <braunr> there is now easy way to know which application actually uses the
+ server
+ <braunr> i personally tend to think more and more that servers *should*
+ impersonate clients
+ <braunr> TM (or rather, scheduling context inheritance) is one step
+ <braunr> it's not enough exactly because it doesn't help with resource
+ accounting
+ <braunr> teythoon:
+ ftp://ftp.mklinux.org/pub/mklinux-pre-R1/SRPMS/sources/osfmk.tar.gz
+
+
+# IRC, freenode, #hurd, 2013-09-02
+
+[[!taglink open_issue_documentation]]: move information to
+[[microkernel/mach/history]].
+
+ <teythoon> braunr: btw, I just noticed lot's of #ifdef MIGRATING_THREADS in
+ gnumach, so there was some work being done in that direction?
+ <braunr> gnumach is a fork of mach4
+ <braunr> at a stage whre migration was being worked on, yes
+ <teythoon> from what I've gathered, gnumach is the only surviving mach4
+ fork, right?
+ <braunr> yes
+ <braunr> well
+ <braunr> the macos x version is probably one too
+ <braunr> i don't know
+ <teythoon> oh? I read that it was based on mach3
+ <braunr> it is
+ <braunr> i can't tell how much of mach3 versus mach4 it has, and if it's
+ relevant at all
+ <teythoon> and the osfmach, was that also based on mach4?
+ <braunr> yes
+ <teythoon> ok, fair enough
+ <braunr> that's why i think macos x is based on it too
+ <braunr> i initially downloaded osfmach sources to see an example of how
+ thread migration was used from userspace
+ <braunr> and they do have a special threading library for that
diff --git a/open_issues/magic_translator_machtype.mdwn b/open_issues/magic_translator_machtype.mdwn
index cf396d5c..3ae16cf0 100644
--- a/open_issues/magic_translator_machtype.mdwn
+++ b/open_issues/magic_translator_machtype.mdwn
@@ -24,4 +24,5 @@ License|/fdl]]."]]"""]]
Segmentation fault
tschwinge@clubber:~ $ l mach<TAB>Connection to clubber.bddebian.com closed.
-Justus: This is most likely just the shell not handling SIGLOST, see: <https://savannah.gnu.org/bugs/?19479>
+Justus: This is most likely just the shell not handling SIGLOST, see
+[[!GNU_Savannah_bug 19479]].
diff --git a/open_issues/memory_object_model_vs_block-level_cache.mdwn b/open_issues/memory_object_model_vs_block-level_cache.mdwn
index 7da5dea4..22db9b86 100644
--- a/open_issues/memory_object_model_vs_block-level_cache.mdwn
+++ b/open_issues/memory_object_model_vs_block-level_cache.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2012, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -10,6 +10,8 @@ License|/fdl]]."]]"""]]
[[!tag open_issue_documentation open_issue_hurd open_issue_gnumach]]
+[[!toc]]
+
# IRC, freenode, #hurd, 2012-02-14
@@ -271,3 +273,242 @@ License|/fdl]]."]]"""]]
<mcsim> slpz: When mo_data_return is called, once the memory manager no
longer needs supplied data, it should be deallocated using
vm_deallocate. So this way pagers acknowledges the end of flush.
+
+
+# IRC, freenode, #hurd, 2013-08-26
+
+ < Spyro> Ok, so
+ < Spyro> idiot question: in a nutshell, what is a memory object?
+ < Spyro> and how is swapping/paging handled?
+ < braunr> Spyro: a memory object is how the virtual memory system views a
+ file
+ < braunr> so it's a sequence of bytes with a length
+ < braunr> "swapping" is just a special case of paging that applies to
+ anonymous objects
+ < braunr> (which are named so because they're not associated with a file
+ and have no name)
+ < Spyro> Who creates a memory object, and when?
+ < braunr> pagers create memory objects when needed, e.g. when you open a
+ file
+ < Spyro> and this applies both to mmap opens as well as regular I/O opens
+ as in for read() and write()?
+ < braunr> basically, all file systems capable of handling mmap requests
+ and/or caching in physical memory are pagers
+ < braunr> yes
+ < braunr> read/write will go through the page cache when possible
+ < Spyro> and who owns the page cache?
+ < Spyro> also, who decides what pages ot evict to swap/file if physical
+ memory gets tight?
+ < braunr> the kernel
+ < braunr> that's one of the things that make mach a hybrid
+ < Spyro> so the kernel owns the page cage?
+ < Spyro> ...fml
+ < Spyro> cache!
+ < braunr> yes
+
+
+## IRC, freenode, #hurd, 2013-08-27
+
+ < Spyro> so braunr: So, who creates the memory object, and how does it get
+ populated?
+ < Spyro> and how does a process accessing a file get hooked up to the
+ memory object?
+ < braunr> Spyro: i told you, pagers create memory objects
+ < braunr> memory objects are how the VM system views files, so they're
+ populated from the content of files
+ < braunr> either true files or virtual files such as in /proc
+ < braunr> Spyro: processes don't directly access memory objects unless
+ memory mapping them with vm_map()
+ < braunr> pagers (basically = file systems) do
+ <Spyro> ok, so how is a pager/fs involved in handling a fault?
+
+
+## IRC, freenode, #hurd, 2013-08-28
+
+ <braunr> Spyro: each object is linked to a pager
+ <braunr> Spyro: when a fault occurs, the kernel looks up the VM map (kernel
+ or a user one), and the address in this map, then the map entry, checks
+ access and lots of other details
+ <Spyro> ok, so it's pager -> object -> vmem
+ <Spyro> ?
+ <braunr> Spyro: then finds the object mapped at that address (similar to
+ how a file is mapped with mmap)
+ <braunr> from the object, it finds the pager
+ <Spyro> ok
+ <braunr> and asks the pager about the data at the appropriate offset
+ <Spyro> so how does a user process do normal file I/O? is faulting just a
+ special case of it?
+ <braunr> it's completely separate
+ <Spyro> eww
+ <braunr> normal I/O is done with message passing
+ <braunr> the hurd io interface
+ <Spyro> ok
+ <Spyro> so who talks to who on a file I/O?
+ <braunr> a client (e.g. cat) talks to a file system server (e.g. ext2fs)
+ <Spyro> ok so
+ <Spyro> it's client to the pager for regular file I/O?
+ <braunr> Spyro: i don't understand the question
+ <braunr> Spyro: it's client to server, the server might not be a pager
+ <Spyro> ok
+ <Spyro> just trying to figure out the difference between paging/faulting
+ and regular I/O
+ <braunr> regular I/O is just message passing
+ <braunr> page fault handling is dealt with by pagers
+ <Spyro> and I have a hunch that the fs/pager is involved somehow in both,
+ because the server is the source of the data
+ <Spyro> I'm getting a headache
+ <braunr> nalaginrut: a server like ext2fs is both a file server and a pager
+ <Spyro> oh!
+ <Spyro> oh btw, does a file server make use of memory objects for caching?
+ <braunr> Spyro: yes
+ <Spyro> or rather, can it?
+ <Spyro> does it have to?
+ <braunr> memory objects are for caching, and thus for page faults
+ <braunr> Spyro: for caching, it's a requirement
+ <braunr> for I/O, it's not
+ <braunr> you could have I/O without memory objects
+ <Spyro> ok
+ <Spyro> so how does the pager/fileserver use memory objects for caching?
+ <Spyro> does it just map and write to them?
+ <braunr> basically yes but there is a complete protocol with the kernel for
+ that
+ <braunr>
+ http://www.gnu.org/software/hurd/gnumach-doc/External-Memory-Management.html#External-Memory-Management
+ <Spyro> heh, lucky guess
+ <Spyro> ty
+ <Spyro> I am in way over my head here btw
+ <Spyro> zero experience with micro kernels in practice
+ <braunr> it's not trivial
+ <braunr> that's not a microkernel thing at all
+ <braunr> that's how it works in monolithic kernels too
+ <braunr> i recommend netbsd uvm thesis
+ <braunr> there are nice pictures describing the vm system
+ <Spyro> derrr...preacious?
+ <Spyro> wow
+ <braunr> just ignore the anonymous memory handling part which is specific
+ to uvm
+ <Spyro> @_@
+ <braunr> the rest is common to practically all VM systems out there
+ <Spyro> I know about the linux page cache
+ <braunr> well it's almost the same
+ <Spyro> with memory objects being the same thing as files in a page cache?
+ <braunr> memory objects are linux "address spaces"
+ <braunr> and address spaces are how the linux mm views a file, yes
+ <Spyro> derp
+ <Spyro> ...
+ <Spyro> um...
+ <braunr> struvt vm_page == struct page
+ * Spyro first must learn what an address_space is
+ <braunr> struct vm_map == struct mm_struct
+ <braunr> struct vm_map_entry == struct vm_area_struct
+ * Spyro isn't a linux kernel vm expert either
+ <braunr> struct vm_object == struct address_space
+ <braunr> roughly
+ <braunr> details vary a lot
+ <Spyro> and what's an address_space ?
+ <braunr> 11:41 < braunr> and address spaces are how the linux mm views a
+ file, yes
+ <Spyro> ok
+ <braunr> see include/linux/fs.h
+ <braunr> struct address_space_operations is the pager interface
+ * Spyro should look at the linux kernel sources perhaps, unless you have an
+ easier reference
+ <Spyro> embarrassingly, RVR hired me as an editor for the linux-mm wiki
+ <Spyro> I should know this stuff
+ <braunr> see
+ http://darnassus.sceen.net/~rbraun/design_and_implementation_of_the_uvm_virtual_memory_system.pdf
+ <braunr> page 42
+ <braunr> page 66 for another nice view
+ <braunr> i wouldn't recommend using linux source as refernece
+ <braunr> it's very complicated, filled with a lot of code dealing with
+ details
+ <Spyro> lmao
+ <braunr> and linux guys have a habit of choosing crappy names
+ <Spyro> I was only going to
+ <Spyro> stoppit
+ <braunr> except for "linux" and "git"
+ <Spyro> ...make me laugh any more and I'll need rib surgery
+ <braunr> laugh ?
+ <Spyro> complicated and crappy
+ <braunr> seriously, "address space" for a file is very very confusing
+ <Spyro> oh I agree with that
+ <braunr> yes, names are crappy
+ <braunr> and the code is very complicated
+ <braunr> it took me half an hour to find where readahead is done once
+ <braunr> and i'm still not sure it was the right code
+ <Spyro> so in linkern, there is an address_space for each cached file?
+ <braunr> takes me 30 seconds in netbsd ..
+ <braunr> yes
+ <Spyro> eww
+ <Spyro> yeah, BAD name
+ <Spyro> but thanks for the explanation
+ <Spyro> now I finally know what an address space is
+ <braunr> many linux core developers admit they don't care much about names
+ <Spyro> so, in hurd, a memory object is to hurd, what an address_space is
+ to linux?
+ <braunr> yes
+ <braunr> notto hurd
+ <Spyro> ok
+ <braunr> to mach
+ <Spyro> you know what I mean
+ <Spyro> :P
+ <Spyro> easier than for linux I can tell you that much
+ <braunr> and the bsd vm system is a stripped version of the mach vm
+ <Spyro> ok
+ <braunr> that's why i think it's important to note it
+ <Spyro> good, I learned something abou tthe linux vm...from the mach guys
+ <Spyro> this is funny
+ <braunr> linux did too
+ <braunr> there is a paper about linux page eviction that directly borrows
+ the mach algorithm and improves it
+ <braunr> mach is the historic motivation behind mmap on posix
+ <Spyro> oh nice!
+ <Spyro> but yes, linux picked a shitty name
+ <braunr> is all that clearer to you ?
+ <Spyro> I think that address_space connection was a magic bolt of
+ understanding
+ <braunr> and do you see how I/O and paging are mostly unrelated ?
+ <Spyro> almost
+ <Spyro> but how does a file I/O take advantage of caching by a memory
+ object?
+ <Spyro> does the file server just nudge the core for a hint?
+ <braunr> the file system copies from the memory object
+ * Spyro noddles
+ <Spyro> I think I understand a bit better now
+ <braunr> it's message passing
+ <Spyro> but I havfe too much to digest already
+ <braunr> memory copying
+ <braunr> if the memory is already there, good, if not, the kernel will ask
+ the file system to bring the data
+ <braunr> if message passing uses zero copy, data retrieval can be deferred
+ until the client actually accesses it
+ <Spyro> which is a fancy way of saying demand paging? :P
+ <braunr> it's always demand paging
+ <braunr> what i mean is that the file system won't fetch data as soon as it
+ copies memory
+ <braunr> but when this data is actually needed by the client
+ <Spyro> uh...
+ <Spyro> whta's a precious page?
+ <braunr> let me check quickly
+ <braunr> If precious is FALSE, the kernel treats the data as a temporary
+ and may throw it away if it hasn't been changed. If the precious value is
+ TRUE, the kernel treats its copy as a data repository and promises to
+ return it to the manager
+ <braunr> basically, it's used when you want the kernel to keep cached data
+ in memory
+ <braunr> the cache becomes a lossless container for such pages
+ <braunr> the kernel may flush them, but not evict them
+ <Spyro> what's the difference?
+ <braunr> imagine a ramfs
+ <Spyro> point made
+ <braunr> ok
+ <Spyro> would be pretty hard to flush something that doesn't have a backing
+ store
+ <braunr> that was quick :)
+ <braunr> well
+ <braunr> the normal backing store for anonymous memory is the default pager
+ <braunr> aka swap
+ <Spyro> eww
+ <braunr> but if you want your data *either* in swap or in memory and never
+ in both
+ <braunr> it may be useful
diff --git a/open_issues/mig_portable_rpc_declarations.mdwn b/open_issues/mig_portable_rpc_declarations.mdwn
index 084d7454..91838f60 100644
--- a/open_issues/mig_portable_rpc_declarations.mdwn
+++ b/open_issues/mig_portable_rpc_declarations.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2011, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -56,3 +56,114 @@ License|/fdl]]."]]"""]]
<antrik> braunr: we discussed the problem of expressing structs with MIG in
the libburn thread
<antrik> (which I still need to follow up on... [sigh])
+
+
+# IRC, freenode, #hurd, 2013-06-25
+
+ <teythoon> is there a nice way to get structured data through mig that I
+ haven't found yet?
+ <teythoon> say an array of string triples
+ <braunr> no
+ <teythoon> :/
+ <braunr> but you shouldn't need that
+ <teythoon> my use case is getting info about fs translators from init to
+ procfs
+
+[[community/gsoc/project_ideas/mtab]],
+[[community/gsoc/project_ideas/mtab/discussion]].
+
+ <teythoon> should I go for an iterator like interface instead?
+ <braunr> depends
+ <braunr> how many do you need ?
+ <braunr> you could go for a variable sized array too
+ <braunr> have a look at what already exists
+ <teythoon> records, maybe 10-15, depends on many fs translators are running
+ <braunr> a variable sized array is ok if the size isn't too big (and when i
+ say too big, i mean hundreds of MiB)
+ <braunr> an iterator is ok too if there aren't too many items
+ <braunr> you may want to combine both (i think that's what proc does)
+ <braunr> be aware that the maximum size of a message is limited to 512 MiB
+ <teythoon> yeah I saw the array[] of stuff stuff, but array[] of string_t
+ does not work, I guess b/c string_t is also an array
+ <teythoon> how would I send an array of variable length strings?
+ <braunr> i'm not sure you can
+ <braunr> or maybe out of line
+ <teythoon> somehow I expected mig to serialize arbitrary data structures,
+ maybe it's to old for that?
+ <teythoon> yeah, I read about uot of line, but that seems overkill
+ <braunr> it is old yes
+ <braunr> and not very user friendly in the end
+ <braunr> let me check
+ <teythoon> we could stuff json into mig...
+ <braunr> see proc_getallpids for example
+ <braunr> we could get rid of low level serialization altogether :p
+ <teythoon> hah, exactly what I was looking at
+ <braunr> (which is what i'll do in x15)
+ <braunr> type pidarray_t = array[] of pid_t;
+ <teythoon> but that is trivial b/c its array[] of pid_t
+ <braunr> and always have the server writing guide near you
+ <teythoon> yes
+ <braunr> well, make one big string and an array of lengths :p
+ <teythoon> thought about that and said to myself, there must be a better
+ way that I haven't found yet
+ <braunr> or one big string filled with real null-terminated c strings that
+ you keep parsing until you ate all input bytes
+ <braunr> i'm almost certain there isn't
+ <braunr> type string_t = c_string[1024]; /* XXX */
+ <teythoon> yes
+ <braunr> even that isn't really variable sized
+ <teythoon> you think anyone would object to me putting a json encoder in
+ /hurd/init? it is probably better than me at serializing stuff...
+ <braunr> try with mig anyway
+ <braunr> the less dependencies we have for core stuff, the simpler it is
+ <braunr> but i agree, mig is painful
+ <teythoon> would it be too hacky if I abused the argz functions? they do
+ exactly what I'd need
+
+
+## IRC, freenode, #hurd, 2013-06-26
+
+ <teythoon> there is https://code.google.com/p/protobuf-c/ and it has a rpc
+ mechanism and I believe one could plug arbitrary transports easily
+ <braunr> please don't think about it
+ <braunr> we really don't want to add another layer of serialization
+ <braunr> it's better to completely redesign mach ipc anyway
+ <braunr> and there is a project for that :p
+ <teythoon> ive seen x15
+ <teythoon> just food for thought
+ <braunr> i've studied google protocol buffers
+ <braunr> and fyi, no, it wouldn't be easy to plug arbitrary transports on
+ top of mach
+ <braunr> there is a lot of knowledge about mach ports in mig
+
+[[community/gsoc/project_ideas/mtab]],
+[[community/gsoc/project_ideas/mtab/discussion]].
+
+ <teythoon> but again I face the challenge of serializing a arbitrary sized
+ list of arbitrary sized strings
+ <braunr> yes
+ <teythoon> list of ports is easier ;) but I think its worthwile
+ <teythoon> so what about abusing argz* for this? you think it's too bad a
+ hack?
+ <braunr> no since it's in glibc
+ <teythoon> awesome :)
+ <braunr> but i don't remember the details well and i'm not sure the way you
+ use it is safe
+ <teythoon> yeah, I might have got the details wrong, I hadn't had the
+ chance to test it ;)
+
+ <braunr> about this dynamic size problem
+ <braunr> a "simple" varying size array should do
+ <braunr> you can easily put all your strings in there
+ <teythoon> seperated by 0?
+ <braunr> yes
+ <teythoon> that's exactly what the argz stuff does
+ <braunr> you'll get the size of the array anyway, and consume it until
+ there is no byte left
+ <braunr> good
+ <braunr> but be careful with this too
+ <braunr> since translators can be run by users, they somtimes can't be
+ trusted
+ <braunr> and even a translator running as root may behave badly
+ <braunr> so careful with parsing
+ <teythoon> noted
diff --git a/open_issues/mig_stub_functions.mdwn b/open_issues/mig_stub_functions.mdwn
new file mode 100644
index 00000000..24a582b1
--- /dev/null
+++ b/open_issues/mig_stub_functions.mdwn
@@ -0,0 +1,41 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+[[!tag open_issue_mig]]
+
+[[!toc]]
+
+
+# RPC Stubs Implemented by Hand
+
+## IRC, freenode, #hurd, 2013-07-28
+
+ <teythoon> why is libfshelp/start-translator-long.c doing the fsys_startup
+ rpcs by hand instead of using the mig generated stubs?
+
+
+## IRC, freenode, #hurd, 2013-07-29
+
+ <teythoon> btw, anyone knows why libfshelp/start-translator-long.c
+ implements the fsys_startup rpc by hand?
+ <braunr> teythoon: no idea
+ <teythoon> maybe b/c of the need to specify a timeout? can one do that with
+ the mig stubs?
+ <braunr> yes
+ <braunr> select used to be implemented that way
+
+
+# Generate the Request and Reply Routines from the Synchronous Routines
+
+## IRC, freenode, #hurd, 2013-09-19
+
+ <teythoon> btw, is there any reason why mig couldn't generate the request
+ and reply routines from the synchronous routines?
+ <braunr> i guess it could
diff --git a/open_issues/mondriaan_memory_protection.mdwn b/open_issues/mondriaan_memory_protection.mdwn
new file mode 100644
index 00000000..2c7b9ba1
--- /dev/null
+++ b/open_issues/mondriaan_memory_protection.mdwn
@@ -0,0 +1,85 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+<http://scale.eecs.berkeley.edu/mondriaan/>.
+
+
+# IRC, freenode, #hurd, 2013-07-02
+
+ <xscript> in any case, what I wanted to check is if current hurd support
+ PIE
+ <xscript`> I just saw samuel posted some fixes to have PIE working in hurd
+ <xscript`> are those included in the official image?
+ <youpi> sure
+ <youpi> it's just a trivial fixup in some address calculation code
+ <xscript> youpi: nice
+ <xscript> and does anyone know how complex would it be to implement some
+ hackish support to have non-overlapping virtual addresses for
+ applications supporting PIE?
+ <braunr> not too difficult
+ <xscript> really? I didn't expect such an answer XD
+ <xscript> I'd like to have something similar to a SASOS
+ <xscript> (single address space os)
+ <braunr> ?
+ <braunr> you mean an sasos on top of mach ?
+ <xscript> yes, but only for a few apps I want to evaluate
+ <braunr> i see
+ <xscript> the optimal would be to have all of hurd's servers on that mode
+ <braunr> you'l probably need to implement a small allocator but other than
+ that it shouldn't be too hard, yes
+ <braunr> uh ??
+ <xscript> but running on 32 bits can be a problem here
+ <braunr> and not hurdish at all
+ <xscript> what's not hurdish?
+ <braunr> we do want address space separation
+ <xscript> well, you can have multiple address spaces (page tables), but
+ without overlapping addresses between them
+ <xscript> that's exactly what I'm looking for
+ <braunr> sorry i don't see what you mean
+ <braunr> if you run several servers in the same address space, they can
+ corrupt each other
+ <braunr> we don't want that
+ <braunr> it's that simple
+ <xscript> yes, sorry, I didn't explain myself
+ <xscript> I want a separate address space on each server
+ <xscript> but I want all memory allocations to be on addresses unique to
+ the whole OS
+ <braunr> that still doesn't make sense
+ <xscript> well, it will still be secure
+ <xscript> but I know it does not make sense per se
+ <xscript> I want to do some experiments with a simulator
+ <braunr> why do you want them non overlapping if they're separate ?
+ <xscript> well, in my simulator I wouldn't need to change the page tables,
+ protection is provided through other means
+ <braunr> segmentation ?
+ <xscript> that's one possibility
+ <xscript> (small address spaces)
+ <braunr> what do you have in mind ?
+ <braunr> it wouldn't be on top of mach anyway then
+ <braunr> mach implements paging
+ <xscript> what I'm simulating is something of the likes of Mondriaan
+ (http://www.cs.utexas.edu/~witchel/pubs/mmp-asplos2002.pdf)
+ <xscript> paging is ok for me
+ <braunr> 19:06 < xscript> well, in my simulator I wouldn't need to change
+ the page tables, protection is provided through other means
+ <braunr> it didn't sound so
+ <xscript> I meant switching page tables (cr3, etc)
+ <braunr> mach does that
+ <xscript> I know, I know, I can just ignore that part for the moment
+ <braunr> ok
+ <xscript> for now, I'd like to morph hurd into a SASOS using one page table
+ per process
+ <xscript> I just wanted to know how hard that would be, without starting
+ with a full dive into the code
+ <xscript> there are other options (OSes, microkernels), but none of them
+ provides as many readily-available applications as hurd
+ <xscript> I suppose MINIX would also be easy to modify, but there's less
+ apps there, and I also would like to tamper with MIG
+ <xscript> I just wonder how hard it would be to modify MIG
diff --git a/open_issues/nightly_builds.mdwn b/open_issues/nightly_builds.mdwn
index 167e7375..96567685 100644
--- a/open_issues/nightly_builds.mdwn
+++ b/open_issues/nightly_builds.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2010, 2011, 2012 Free Software Foundation,
+[[!meta copyright="Copyright © 2010, 2011, 2012, 2013 Free Software Foundation,
Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
@@ -25,8 +25,13 @@ Resources:
* <http://hudson-ci.org/>, <http://jenkins-ci.org/>
+ * [[!message-id "201308251648.38010.holger@layer-acht.org"]]
+
* <http://buildbot.net/>
+ * [LAVA (Linaro Automated Validation
+ Architecture)](http://lava.readthedocs.org/)
+
---
See also [[nightly_builds_deb_packages]].
diff --git a/open_issues/nptl.mdwn b/open_issues/nptl.mdwn
index 9ff5fb51..3c84bfb0 100644
--- a/open_issues/nptl.mdwn
+++ b/open_issues/nptl.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2010 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2010, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -10,7 +10,10 @@ License|/fdl]]."]]"""]]
[[!tag open_issue_libpthread open_issue_glibc]]
-IRC, #hurd, 2010-07-31
+[[!toc]]
+
+
+# IRC, freenode, #hurd, 2010-07-31
<tschwinge> Other question: how difficult is a NPTL port? Futexes and some kernel interfaces for scheduling stuff etc. -- what else?
<youpi> actually NPTL doesn't _require_ futexes
@@ -26,6 +29,21 @@ IRC, #hurd, 2010-07-31
<tschwinge> ... and even less so the interfavce that actual applications are using.
<tschwinge> We'd need to evaluate which benefits NPTL would bring.
+
+# IRC, freenode, #hurd, 2013-08-05
+
+ <gnu_srs> Hi, looks like kfreebsd are now using an NPTL-based pthread
+ library: FBTL, http://lists.debian.org/debian-bsd/2013/07/msg00060.html
+ <gnu_srs> Anything of interest for porting to Hurd? See also
+ http://lists.debian.org/debian-hurd/2013/08/msg00000.html
+ <azeem> Petr could've been more verbose in his announcements
+ <pinotree> and there's
+ http://www.gnu.org/software/hurd/open_issues/nptl.html in our wiki
+ <azeem> well, it seems to work fine for kFreeBSD:
+ http://lists.debian.org/debian-bsd/2013/07/msg00134.html
+ <azeem> and http://lists.debian.org/debian-bsd/2013/07/msg00138.html
+
+
---
# Resources
diff --git a/open_issues/open_symlink.mdwn b/open_issues/open_symlink.mdwn
index 20e4a4fe..f71109a9 100644
--- a/open_issues/open_symlink.mdwn
+++ b/open_issues/open_symlink.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2012 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2012, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -10,9 +10,21 @@ License|/fdl]]."]]"""]]
[[!tag open_issue_glibc]]
+
# IRC, freenode, #hurd, 2012-01-02
<pinotree> hm, is it a known issue that open("somesymlink", O_RDONLY |
O_NOFOLLOW) does not fail with ELOOP?
<youpi> pinotree: iirc there is code for it, maybe not the same behavior as
on linux
+
+
+## IRC, OFTC, #debian-hurd, 2013-05-08
+
+ <pinotree> the hurd issue is that Q_NOFOLLOW seems broken on symlinks, and
+ thus open(symlink, O_NOFOLLOW) doesn't fail with ELOOP
+ <youpi> I don't really see why it should fail
+ <youpi> since NOFOLLOW says not to follow the symlink
+ <pinotree> yeah, but you cannot open a symlink
+ <youpi> ah right ok
+ <youpi> interesting :)
diff --git a/open_issues/profiling.mdwn b/open_issues/profiling.mdwn
index 26e6c97c..545edcf6 100644
--- a/open_issues/profiling.mdwn
+++ b/open_issues/profiling.mdwn
@@ -9,10 +9,14 @@ Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
is included in the section entitled [[GNU Free Documentation
License|/fdl]]."]]"""]]
+[[!meta title="Profiling, Tracing"]]
+
*Profiling* ([[!wikipedia Profiling_(computer_programming) desc="Wikipedia
article"]]) is a tool for tracing where CPU time is spent. This is usually
done for [[performance analysis|performance]] reasons.
+ * [[hurd/debugging/rpctrace]]
+
* [[gprof]]
Should be working, but some issues have been reported, regarding GCC spec
@@ -33,3 +37,104 @@ done for [[performance analysis|performance]] reasons.
* [[SystemTap]]
* ... or some other Linux thing.
+
+
+# IRC, freenode, #hurd, 2013-06-17
+
+ <congzhang> is that possible we develop rpc msg analyse tool? make it clear
+ view system at different level?
+ <congzhang> hurd was dynamic system, how can we just read log line by line
+ <kilobug> congzhang: well, you can use rpctrace and then analyze the logs,
+ but rpctrace is quite intrusive and will slow down things (like strace or
+ similar)
+ <kilobug> congzhang: I don't know if a low-overhead solution could be made
+ or not
+ <congzhang> that's the problem
+ <congzhang> when real system run, the msg cross different server, and then
+ the debug action should not intrusive the process itself
+ <congzhang> we observe the system and analyse os
+ <congzhang> when rms choose microkernel, it's expect to accelerate the
+ progress, but not
+ <congzhang> microkernel make debug a litter hard
+ <kilobug> well, it's not limited to microkernels, debugging/tracing is
+ intrusive and slow things down, it's an universal law of compsci
+ <kilobug> no, it makes debugging easier
+ <congzhang> I don't think so
+ <kilobug> you can gdb the various services (like ext2fs or pfinet) more
+ easily
+ <kilobug> and rpctrace isn't any worse than strace
+ <congzhang> how easy when debug lpc
+ <kilobug> lpc ?
+ <congzhang> because cross context
+ <congzhang> classic function call
+ <congzhang> when find the bug source, I don't care performance, I wan't to
+ know it's right or wrong by design, If it work as I expect
+ <congzhang> I optimize it latter
+ <congzhang> I have an idea, but don't know weather it's usefull or not
+ <braunr> rpctrace is a lot less instrusive than ptrace based tools
+ <braunr> congzhang: debugging is not made hard by the design choice, but by
+ implementation details
+ <braunr> as a simple counter example, someone often cited usb development
+ on l3 being made a lot easier than on a monolithic kernel
+ <congzhang> Collect the trace information first, and then layout the msg by
+ graph, when something wrong, I focus the trouble rpc, and found what
+ happen around
+ <braunr> "by graph" ?
+ <congzhang> yes
+ <congzhang> braunr: directed graph or something similar
+ <braunr> and not caring about performance when debugging is actually stupid
+ <braunr> i've seen it on many occasions, people not being able to use
+ debugging tools because they were far too inefficient and slow
+ <braunr> why a graph ?
+ <braunr> what you want is the complete trace, taking into account cross
+ address space boundaries
+ <congzhang> yes
+ <braunr> well it's linear
+ <braunr> switching server
+ <congzhang> by independent process view it's linear
+ <congzhang> it's linear on cpu's view too
+ <congzhang> yes, I need complete trace, and dynamic control at microkernel
+ level
+ <congzhang> os, if server crash, and then I know what's other doing, from
+ the graph
+ <congzhang> graph needn't to be one, if the are not connect together, time
+ sort them
+ <congzhang> when hurd was complete ok, some tools may be help too
+ <braunr> i don't get what you want on that graph
+ <congzhang> sorry, I need a context
+ <congzhang> like uml sequence diagram, I need what happen one by one
+ <congzhang> from server's view and from the function's view
+ <braunr> that's still linear
+ <braunr> so please stop using the word graph
+ <braunr> you want a trace
+ <braunr> a simple call trace
+ <congzhang> yes, and a tool
+ <braunr> with some work gdb could do it
+ <congzhang> you mean under some microkernel infrastructure help
+ <congzhang> ?
+ <braunr> if needed
+ <congzhang> braunr: will that be easy?
+ <braunr> not too hard
+ <braunr> i've had this idea for a long time actually
+ <braunr> another reason i insist on migrating threads (or rather, binding
+ server and client threads)
+ <congzhang> braunr: that's great
+ <braunr> the current problem we have when using gdb is that we don't know
+ which server thread is handling the request of which client
+ <braunr> we can guess it
+ <braunr> but it's not always obvious
+ <congzhang> I read the talk, know some of your idea
+ <congzhang> make things happen like classic kernel, just from function
+ ,sure:)
+ <braunr> that's it
+ <congzhang> I think you and other do a lot of work to improve the mach and
+ hurd, buT we lack the design document and the diagram, one diagram was
+ great than one thousand words
+ <braunr> diagrams are made after the prototypes that prove they're doable
+ <braunr> i'm not a researcher
+ <braunr> and we have little time
+ <braunr> the prototype is the true spec
+ <congzhang> that's why i wan't cllector the trace info and show, you can
+ know what happen and how happen, maybe just suitable for newbie, hope
+ more young hack like it
+ <braunr> once it's done, everything else is just sugar candy around it
diff --git a/open_issues/pthread_atfork.mdwn b/open_issues/pthread_atfork.mdwn
index ac724cf0..1b656f05 100644
--- a/open_issues/pthread_atfork.mdwn
+++ b/open_issues/pthread_atfork.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2011, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -8,6 +8,13 @@ Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
is included in the section entitled [[GNU Free Documentation
License|/fdl]]."]]"""]]
-[[!tag open_issue_libpthread]]
+[[!tag open_issue_glibc open_issue_libpthread]]
-pthread_atfork is not actually implemented, making some programs fail. Code can probably be borrowed from nptl/sysdeps/unix/sysv/linux/register-atfork.c
+`pthread_atfork` is not actually implemented, making some programs fail. Code
+can probably be borrowed from `nptl/sysdeps/unix/sysv/linux/register-atfork.c`.
+
+
+# IRC, OFTC, #debian-hurd, 2013-08-21
+
+ <pinotree> SRCDIR/opal/mca/memory/linux/arena.c:387: warning: warning:
+ pthread_atfork is not implemented and will always fail
diff --git a/open_issues/resource_management_problems.mdwn b/open_issues/resource_management_problems.mdwn
index 8f752d61..daf97954 100644
--- a/open_issues/resource_management_problems.mdwn
+++ b/open_issues/resource_management_problems.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2008, 2009, 2010 Free Software Foundation,
+[[!meta copyright="Copyright © 2008, 2009, 2010, 2013 Free Software Foundation,
Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
@@ -61,7 +61,8 @@ This is, of course, non-trivial to implement, and also requires changing the
SPLICE_F_GIFT
flag](http://www.kernel.org/doc/man-pages/online/pages/man2/vmsplice.2.html#DESCRIPTION).)
-IRC, freenode, #hurd, 2011-07-31
+
+## IRC, freenode, #hurd, 2011-07-31
< braunr> one of the biggest problems on the hurd is that, when a client
makes a call, kernel (and other) resources are allocated on behalf of the
@@ -75,6 +76,20 @@ IRC, freenode, #hurd, 2011-07-31
attempts)
+## IRC, freenode, #hurd, 2013-08-13
+
+In context of <https://teythoon.cryptobitch.de/posts/my-worst-week-yet/>.
+
+ <braunr> teythoon: actually, thread migration isn't required for resource
+ accounting
+
+[[Mach_migrating_threads]].
+
+ <teythoon> braunr: but it solves it for free, doesn't it?
+ <braunr> teythoon: no
+ <braunr> it's really more complicated than that
+
+
# Further Examples
* [[hurd/critique]]
@@ -83,4 +98,34 @@ IRC, freenode, #hurd, 2011-07-31
* [[translators_set_up_by_untrusted_users]], and [[pagers]]
- * [[configure max command line length]]
+ * [[configure_max_command_line_length]]
+
+
+## [[hurd/translator/exec]] server
+
+### IRC, freenode, #hurd, 2013-08-05
+
+ <teythoon> unzipping stuff in the exec server enables a dos on filesystem
+ translators
+ <teythoon> https://teythoon.cryptobitch.de/gsoc/heap/hello-1g.bz2 is
+ /hurd/hello padded with a gig of zeros, compressed with bzip2
+ <teythoon> if set as an passive translator, it stalls other requests to the
+ filesystem, at least it does if ext2fs is used
+ <braunr> teythoon: ?
+ <braunr> teythoon: what's the dos here ?
+ <teythoon> I can prevent you from doing anything with the root filesystem
+ <teythoon> I'm kind of surprised myself, maybe a lock is held during the
+ exec of the translator?
+ <teythoon> the filesystem the hello-1g.bz2 translator is bound to is
+ affected
+ <braunr> teythoon: i don't understand
+ <braunr> have you tried starting something from another file system ?
+ <braunr> the lock may simply be in the exec server itself
+ <teythoon> no, starting other things works fine
+ <teythoon> but on the other hand, a find / is stalled
+ <braunr> :/
+ <braunr> *sigh*
+ <teythoon> don't worry
+ <teythoon> there is a solution :p
+ <braunr> :)
+ <teythoon> and it only requires deleting code
diff --git a/open_issues/robustness.mdwn b/open_issues/robustness.mdwn
index 1f8aa0c6..a6b0dbfb 100644
--- a/open_issues/robustness.mdwn
+++ b/open_issues/robustness.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2011, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -10,6 +10,7 @@ License|/fdl]]."]]"""]]
[[!tag open_issue_documentation open_issue_hurd]]
+[[!toc]]
# IRC, freenode, #hurd, 2011-11-18
@@ -32,7 +33,9 @@ License|/fdl]]."]]"""]]
<etenil> ah yeah I thought so :)
-# IRC, freenode, #hurd, 2011-11-19
+# Reincarnation Server
+
+## IRC, freenode, #hurd, 2011-11-19
<chromaticwt> will hurd ever have the equivalent of a rs server?, is that
even possible with hurd?
@@ -127,3 +130,40 @@ License|/fdl]]."]]"""]]
<spiderweb> neat, thanks
<braunr> actually it's not that old at all
<braunr> around 2007
+
+
+## IRC, freenode, #hurd, 2013-08-26
+
+ < teythoon> I came across some paper about process reincarnation and
+ created a little prototype a while back:
+ < teythoon> http://darnassus.sceen.net/gitweb/teythoon/reincarnation.git/
+ < teythoon> and I looked into restarting the exec server in case it
+ dies. the exec server is an easy target since it has no state of its own
+ < teythoon> the only problem is that there is no exec server around to
+ start a new one
+ < youpi> teythoon: there could be another exec server only used to
+ (re)start the exec server
+ < youpi> that other exec server could even be restarted by the normal exec
+ server
+ < pinotree> what about a watchdog server?
+ < teythoon> youpi: yes, I had the same idea, i actually patched /hurd/init
+ to do that, it's just not yet working
+ < pinotree> make it watch other servers (exec included), and make exec
+ watch the watchdog only
+ < teythoon> pinotree: look at my prototype, there is a watchdog server
+ < braunr> teythoon: what's the point of reincarnation without persistence ?
+ < teythoon> braunr: there is no point in reincarnation w/o persistence of
+ course
+ < teythoon> my prototype does a limited form of persistence
+ < teythoon> the point was to see whether I can mitm a translator and
+ restart it on demand and to gain more insight into the whole translator
+ mechanism
+ < braunr> teythoon: ok
+ < teythoon> braunr: see the readme, it retains state across reincarnations
+ < braunr> teythoon: how ?
+ < teythoon> braunr: the server can store a checkpoint using the
+ reincarnation_checkpoint procedure
+ < teythoon>
+ http://darnassus.sceen.net/gitweb/teythoon/reincarnation.git/blame/HEAD:/reincarnation.defshttp://darnassus.sceen.net/gitweb/teythoon/reincarnation.git/blame/HEAD:/reincarnation.defs
+ < teythoon> uh >,< sorry, pasted twice
+ < braunr> oh ok
diff --git a/open_issues/secure_file_descriptor_handling.mdwn b/open_issues/secure_file_descriptor_handling.mdwn
index 45e983a7..16c8c85c 100644
--- a/open_issues/secure_file_descriptor_handling.mdwn
+++ b/open_issues/secure_file_descriptor_handling.mdwn
@@ -1,4 +1,5 @@
-[[!meta copyright="Copyright © 2010, 2011 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2010, 2011, 2013 Free Software Foundation,
+Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -16,7 +17,10 @@ on this, posted patches to [[mailing_lists/libc-alpha]]. This works needs to
be resumed
and finished.
----
+Add tests from Linux kernel commit messages for `t/dup3` et al.
+
+Validate completeness according to <https://wiki.freebsd.org/AtomicCloseOnExec>
+or a similar list.
In <http://lwn.net/Articles/417421/> an interesting point is made: *you [may]
want some [[unix/file_descriptor]] to still be open if 'exec' fails, but you
diff --git a/open_issues/sendmsg_scm_creds.mdwn b/open_issues/sendmsg_scm_creds.mdwn
index cf0103df..d4a6126e 100644
--- a/open_issues/sendmsg_scm_creds.mdwn
+++ b/open_issues/sendmsg_scm_creds.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2010, 2011, 2012 Free Software Foundation,
+[[!meta copyright="Copyright © 2010, 2011, 2012, 2013 Free Software Foundation,
Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
@@ -11,7 +11,8 @@ License|/fdl]]."]]"""]]
[[!tag open_issue_glibc]]
-IRC, unknown channel, unknown date.
+
+# IRC, unknown channel, unknown date
<pinotree> Credentials: s_uid 1000, c_uid 1000, c_gid 100, c_pid 2722
<pinotree> 2722: Credentials: s_uid 1000, c_uid 1000, c_gid 100, c_pid 2724
@@ -91,10 +92,80 @@ IRC, unknown channel, unknown date.
<pinotree> yep
<youpi> ok, good :)
-/!\ IRC, freenode, #hurd, 2011-08-11
+
+## IRC, freenode, #hurd, 2011-08-11
< pinotree> (but that patch is lame)
+
+## IRC, freenode, #hurd, 2013-05-09
+
+ <gnu_srs> youpi: Since you are online tonight, which authentication
+ callbacks to be used for SCM_CREDS calls.
+ <gnu_srs> I have working code and need to add this to make things
+ complete. The auth server, lib* or where?
+ <youpi> I don't understand the question
+ <gnu_srs> authentication callbacks like for SCM_RIGHTS, see
+ <gnu_srs>
+ http://www.gnu.org/software/hurd/open_issues/sendmsg_scm_creds.html
+ <youpi> I still don't understand: what are you trying to do actually?
+ <gnu_srs> solving the SCM_CREDS propbems with e.g. dbus.
+ <youpi> so what is the relation with pinotree's patch on the page above?
+ <youpi> (I have no idea of the current status of all that)
+ <gnu_srs> his patch was not merged, right? have to shut down, sorry, bbl,
+ gn8
+ <pinotree> that patch was not merged since it is not in the correct place
+ <youpi> as I said, I have no idea about the status
+ <pinotree> youpi: basically, it boils down to knowing, when executing the
+ code implementing an rpc, who requested that rpc (pid, uid, gid)
+ <youpi> i.e. getting information about the reply port for instance?
+ <youpi> well that might be somehow faked
+ <youpi> (by perhaps giving another task's port as reply port)
+ <pinotree> for example (which would be the code path for SCM_CREDS), when
+ you call call the socket sendmsg(), pflocal would know who did that rpc
+ and fill the auxilliary data)
+ <pinotree> s,)$,,
+ <pinotree> youpi: yes, i know about this faking issue, iirc also antrik
+ mentioned quite some time ago
+ <youpi> ok
+ <pinotree> that's one of the (imho) two issues of this
+ <pinotree> my hurd-foo is not enough to know whether there are solutions to
+ the problem above
+
+
+### IRC, freenode, #hurd, 2013-05-14
+
+ <gnu_srs> Hi, regarding SCM_CREDS, I have some working code in
+ sendmsg.c. Now I need to make a callback to authenticate the pid, uid,
+ etc
+ <gnu_srs> Where to hook call that into pflocal?
+ <gnu_srs> the auth server?
+ <gnu_srs> maybe _io_restrict_auth is the correct call to use (same as for
+ SCM_RIGHTS)?
+
+
+### IRC, freenode, #hurd, 2013-05-17
+
+ <gnu_srs> I'm working on the scm credentials right now to enable (via dbus)
+ more X window managers to work properly.
+ <gnu_srs> seems to be rather tricky:-(
+ <pochu> gnu_srs: I guess you also need SCM_CREDS, right?
+ <gnu_srs> hi pochu, that's what I'm working on, extending your SCM_RIGHTS
+ work to SCM_CREDS
+ <pinotree> that's what i did as proof, years ago?
+ <gnu_srs> it would be good to know which server calls to make, I'll be back
+ with proposals of functions to use.
+ <pinotree> there was a talk, years ago when i started with this, and few
+ days ago too
+ <pinotree> every methods has its own drawbacks, and basically so far it
+ seems that in every method the sender identity can be faked somehow
+ <gnu_srs> pinotree: Yes of course your patch was perfect, but it seemed
+ like people wanted a server acknowledgement too.
+ <pinotree> no, my patch was not perfect at all
+ <pinotree> if it was, it would have been cleaned up and sent few years ago
+ already
+
+
---
See also [[dbus]], [[pflocal_socket_credentials_for_local_sockets]] and
diff --git a/open_issues/some_todo_list.mdwn b/open_issues/some_todo_list.mdwn
index 80592abf..48c2944d 100644
--- a/open_issues/some_todo_list.mdwn
+++ b/open_issues/some_todo_list.mdwn
@@ -42,6 +42,21 @@ From Marcus, 2002:
* Translators
* Does settrans -g work? -fg?
* Does fsysopts work? Does setting options with fsysopts work?
+
+ IRC, freenode, #hurd, 2013-05-23:
+
+ <gnu_srs> fsysopts /servers/socket/2 works by /1 gives Operation not
+ supported.
+
+ [[!taglink open_issue_hurd]].
+
+ <braunr> ah right, some servers don't implement that
+ <braunr> work around this by using showtrans
+ <braunr> fsysopts asks the server itself how it's running, usually giving
+ its command name and options
+ <braunr> showtrans asks the parent how it starts a passive translator
+ attached to the node
+ <gnu_srs> Yes showtrans works :), thanks.
* Does stat() work on all translated nodes and give proper data?
* What about chown, chmod (some translators should pass this through to the underlying node, esp in /dev!)
* Does statfs give correct data?
diff --git a/open_issues/systemd.mdwn b/open_issues/systemd.mdwn
index c23f887f..d00b3d8a 100644
--- a/open_issues/systemd.mdwn
+++ b/open_issues/systemd.mdwn
@@ -102,6 +102,939 @@ Likely there's also some other porting needed.
<braunr> just assume you can't use systemd on anything else than linux
+## IRC, OFTC, #debian-hurd, 2013-08-12
+
+ <azeem> huh, Lennert Poettering just mentioned the Hurd in his systmd talk
+ <azeem> well, in the context of you IPC in Unix sucks and kdbus
+ <azeem> s/you/how/
+ <pinotree> QED
+ <pinotree> what did you expect? :)
+ <azeem> I didn't quite get it, but he seemed to imply the Hurd was a step
+ in the right direction over Unix
+ <azeem> (which is obvious, but it wasn't obvious he had that opinion)
+
+
+## IRC, OFTC, #debian-hurd, 2013-08-13
+
+ <azeem> so cgroups seems to be most prominent thing the systemd people
+ think the Hurd lacks
+ <tschwinge> azeem: In 2010, I came to the same conclusion,
+ <http://www.gnu.org/software/hurd/open_issues/systemd.html>. ;-)
+ <azeem> heh
+ <tschwinge> I don't think of any show-stopper for implementing that -- just
+ someone to do it.
+ <youpi> azeem: which part of cgroups, like being able to kill a cgroup?
+ <youpi> it shouldn't be very hard to implement what systemd needs
+ <azeem> probably also the resource allocation etc.
+ <azeem> the questions are I guess (i) do the cgroups semantics make sense
+ from our POV and/or do we accept that cgroups is the "standard" now and
+ (ii) should systemd require concrete implementations or just the concept
+ in a more abstract sense
+ <teythoon> being the first non Linux OS that runs systemd would be a nice
+ showcase of Hurds flexibility
+ <azeem> maybe upstart is less trouble
+ <pinotree> azeem: possibly
+ <azeem> teythoon: can you just include upstart in your GSOC? kthxbye
+ <pinotree> at least libnih (the library with base utilities and such used
+ by upstart) required a working file monitor (and the current
+ implementation kind of exposes a fd) and certain semantics for waitid
+ <pinotree> libnih/upstart have "just" the issue of being under CLA...
+ <azeem> pinotree: yeah, true
+ <azeem> I suggested "startup" as a name for a fork
+ <pinotree> imho there would be no strict need to fork
+ <teythoon> azeem: but upstart is a lot less interesting. last time I used
+ it it wasn't even possible to disable services in a clean way
+ <pochu> pinotree: is that still so now that Scott works for google?
+ <pinotree> pochu: yeah, since it's a Canonical CLA, not rally something
+ tied to a person
+ <pinotree> (iirc)
+ <pochu> sure, but scott is the maintainer...
+ <pochu> shrug
+ <azeem> nah, scott left upstart
+ <azeem> AFAIK
+ <azeem> at least James Hunt gave a talk earlier with Steve Langasek and
+ introduced himself as the upstart maintainer
+ <azeem> also I heard in the hallway track that the upstart people are
+ somewhat interested in BSD/Hurd support as they see it as a selling point
+ against systemd
+ <pinotree> pochu: it's just like FSF CLA for GNU projects: even if the
+ maintainers/contributors change altogether, copyright assignment is still
+ FSF
+ <azeem> but their accents were kinda annoying/hard to follow so I didn't
+ follow their talk closesly to see whether they brought it up
+ <azeem> pinotree: well, it's not
+ <pochu> azeem: looking at https://code.launchpad.net/libnih, I'm not sure
+ libnih has a maintainer anymore...
+ <azeem> pinotree: first off, you're not signing over the copyright with
+ their CLA, just giving them the right to relicense
+ <azeem> pinotree: but more importantaly, the FSF announced in a legally
+ binding way that they will not take things non-free
+ <azeem> anyway, I'll talk to the upstart guys about libnih
+
+
+## IRC, OFTC, #debian-hurd, 2013-08-15
+
+ <azeem> btw, I talked to vorlon about upstart and the Hurd
+ <azeem> so the situation with libnih is that it is basically
+ feature-complete, but still maintained by Scott
+ <azeem> upstart is leveraging it heavily
+ <azeem> and Scott was (back in the days) against patches for porting
+ <azeem> for upstart proper, Steve said he would happily take porting
+ patches
+
+
+## IRC, freenode, #hurd, 2013-08-26
+
+ < youpi> teythoon: I tend to agree with mbanck
+ < youpi> although another thing worth considering would be adding something
+ similar to control groups
+ < youpi> AIUI, it's one of the features that systemd really requires
+ < braunr> uhg, cgroups already
+ < braunr> youpi: where is that discussion ?
+ < youpi> it was a private mail
+ < braunr> oh ok
+ < teythoon> right, so about upstart
+ < teythoon> to be blunt, I do not like upstart, though my experience with
+ it is limited and outdated
+ < braunr> that was quick :)
+ < braunr> i assume this follows your private discussion with youpi and
+ mbank ?
+ < teythoon> I used it on a like three years old ubuntu and back then it
+ couldn't do stufft hat even sysvinit could do
+ < teythoon> there was not much discussion, mbank suggested that I could
+ work on upstart
+ < teythoon> b/c it might be easier to support than systemd
+ < teythoon> which might be very well true, then again what's the benefit of
+ having upstart? I'm really curious, I should perhaps read up on its
+ features
+ < pinotree> event-based, etc
+ < youpi> it is also about avoiding being pushed out just because we don't
+ support it?
+ < teythoon> yes, but otoh systemd can do amazing things, the featurelist of
+ upstart reads rather mondane in comparison
+ < youpi> I don't really have an opinion over either, apart from portability
+ of the code
+ < braunr> teythoon: the system requirements for systemd would take much
+ time to implement in comparison to what we already have
+ < braunr> i still have maksym's work on last year gsoc on my list
+ < braunr> waiting to push in the various libpager related patches first
+ < teythoon> so you guys think it's worthwile to port upstart?
+ < braunr> no idea
+ < braunr> teythoon: on another subject
+ < azeem_> teythoon: I like systemd more, but the hallway track at Debconf
+ seemed to imply most people like Upstart better except for the CLA
+ < azeem_> which I totally forgot to address
+ < youpi> CLA ?
+ < azeem_> contributor license agreement
+ < braunr> since you've now done very good progress, is your work available
+ in the form of ready-to-test debian packages ?
+ < teythoon> braunr: it is
+ < teythoon> braunr: http://teythoon.cryptobitch.de/gsoc/heap/debian/
+ < braunr> i remember urls in some of your mails
+ < braunr> ah thanks
+ < braunr> "cryptobitch" hum :)
+ < azeem_> in any case, everbody assumed either Upstart or Systemd are way
+ ahead of systemvinit
+ < braunr> sysvinit is really ancient :)
+ < azeem_> apart from the non-event-driven fundamental issue, a lot of
+ people critized that the failure rate at writing correct init-scripts
+ appears to be too high
+ < azeem_> one of the questions brought up was whether it makes sense to
+ continue to ship/support systemvinit once a switch is made to
+ systemd/upstart for the Linux archs
+ < azeem_> systemvinit scripts might bitrot
+ < azeem_> but anyway, I don't see a switch happen anytime soon
+ < teythoon> well, did upstart gain the capability of disabling a service
+ yet?
+ < azeem_> teythoon: no idea, but apparently:
+ http://askubuntu.com/questions/19320/recommended-way-to-enable-disable-services/20347#20347
+ < teythoon> azeem_: then there is hope yet ;)
+ < azeem_> the main selling point of Upstart is that it shipped in several
+ LTS releases and is proven technology (and honestly, I don't read a lot
+ of complaints online about it)
+ < azeem_> (I don't agree that SystemD is unproven, but that is what the
+ Upstart guys implied)
+ < teythoon> am I the only one that thinks that upstart is rather
+ unimpressive?
+ * azeem_ doesn't have an opinion on it
+ < azeem> teythoon:
+ http://penta.debconf.org/dc13_schedule/events/1027.en.html has slides and
+ the video
+ < azeem> teythoon: eh, appears the link to the slides is broken, but they
+ are here:
+ http://people.canonical.com/~jhunt/presentations/debconf13/upstart-debconf-2013.pdf
+ < braunr> teythoon: actually, from the presentation, i'd tend to like
+ upstart
+ < braunr> dependency, parallelism and even runlevel compatibility flows
+ naturally from the event based model
+ < braunr> sysv compatibility is a great feature
+ < braunr> it does look simple
+ < braunr> i admit it's "unimpressive" but do we want an overkill init
+ system ?
+ < braunr> teythoon: what makes you not like it ?
+ < azeem> Lennart critized that upstart doesn't generate events, just
+ listens to them
+ < azeem> (which is a feature, not a bug to some)
+ < braunr> azeem: ah yes, that could be a lack
+ < azeem> braunr: http://penta.debconf.org/dc13_schedule/events/983.en.html
+ was the corresponding SystemD talk by Lennart, though he hasn't posted
+ slides yet I think
+ < teythoon> braunr: well, last time I used it it was impossible to cleanly
+ disable a service
+ < teythoon> also ubuntu makes such big claims about software they develop,
+ and when you read up on them it turns out that most of the advertised
+ functionality will be implemented in the near future
+ < teythoon> then they ship software as early as possible only to say later
+ that is has proven itself for so many years
+ < teythoon> and tbh I hate to be the one that helped port upstart to hurd
+ (and maybe kfreebsd as a byproduct) and later debian choses upstart over
+ systemd b/c it is available for all debian kernels
+ < kilobug> teythoon: ubuntu has a tendency to ship software too early when
+ it's not fully mature/stable, but that doesn't say anything about the
+ software itself
+ < pinotree> teythoon: note the same is sometimes done on fedora for young
+ technologies (eg systemd)
+ < azeem> teythoon: heh, fair enough
+ < p2-mate> braunr: I would prefer if my init doesn't use ptrace :P
+ < teythoon> p2-mate: does upstart use ptrace?
+ < p2-mate> teythoon: yes
+ < teythoon> well, then I guess there won't be an upstart for Hurd for some
+ time, no?
+ < kilobug> p2-mate: why does it use ptrace for ?
+ < p2-mate> kilobug: to find out if a daemon forked
+ < kilobug> hum I see
+ < azeem> p2-mate: the question is whether there's a Hurdish way to
+ accomplish the same
+ < p2-mate>
+ http://bazaar.launchpad.net/~upstart-devel/upstart/trunk/view/head:/init/job_process.c
+ < p2-mate> see job_process_trace_new :)
+ < kilobug> azeem: it doesn't seem too complicated to me to have a way to
+ get proc notify upstart of forks
+ < p2-mate> azeem: that's a good question. there is a linuxish way to do
+ that using cgroups
+ < azeem> right, there's a blueprint suggesting cgroups for Upstart here:
+ https://blueprints.launchpad.net/ubuntu/+spec/foundations-q-upstart-overcome-ptrace-limitations
+ < teythoon> yes, someone should create a init system that uses cgroups for
+ tracking child processes >,<
+ < teythoon> kilobug: not sure it is that easy. who enforces that proc_child
+ is used for a new process? isn't it possible to just create a new mach
+ task that has no ties to the parent process?
+ < teythoon> azeem: what do you mean by "upstart does not generate events"?
+ there are "emits X" lines in upstart service descrpitions, surely that
+ generates event X?
+ < azeem> I think the critique is that this (and those upstart-foo-bridges)
+ are bolted on, while SystemD just takes over your systems and "knows"
+ about them first-hand
+ < azeem> but as I said, I'm not the expert on this
+ < teythoon> uh, in order to install upstart one has to remove sysvinit
+ ("yes i am sure...") and it fails to bring up the network on booting the
+ machine
+ < teythoon> also, both systemd and upstart depend on dbus, so no cookie for
+ us unless that is fixed first, right?
+ < pinotree> true
+ < teythoon> well, what do you want me to do for the next four weeks?
+ < youpi> ideally you could make both upstart and systemd work on hurd-i"86
+ < pinotree> both in 4 weeks?
+ < youpi> so hurd-i386 doesn't become the nasty guy that makes people tend
+ for one or the other
+ < youpi> I said "ideally"
+ < youpi> I don't really have any idea how much work is required by either
+ of the two
+ < youpi> I'd tend to think the important thing to implement is something
+ similar to control groups, so both upstart (which is supposed to use them
+ someday) and systemd can be happy about it
+ < teythoon> looks like upstarts functionality depending on ptrace is not
+ required, but can be enabled on a per service base
+ < teythoon> so a upstart port that just lacks this might be possible
+ < teythoon> youpi: the main feature of cgroups is that a process cannot
+ escape its group, no? i'm not sure how this could be implemented atop of
+ mach in a secure and robust way
+ < teythoon> b/c any process can just create mach tasks
+ < youpi> maybe we need to add a feature in mach itself, yes
+ < teythoon> ok, implementing cgroups sounds fun, I could do that
+ < youpi> azeem: are you ok with that direction?
+ < azeem> well, in general yes; however, AIUI, cgroups is being redesigned
+ upstream, no?
+ < youpi> that's why I said "something like cgroups"
+ < azeem> ah, ok
+ < youpi> we can do something simple enough to avoid design quesetions, and
+ that would still be enough for upstart & systemd
+ < azeem>
+ (http://www.linux.com/news/featured-blogs/200-libby-clark/733595-all-about-the-linux-kernel-cgroups-redesign)
+ btw
+ < braunr> p2-mate: upstart uses ptrace ?
+ < p2-mate> yes
+ < youpi> teythoon: and making a real survey of what needs to be fixed for
+ upstart & systemd
+ < p2-mate> see my link posted earlier
+ < braunr> ah already answered
+ < braunr> grmbl
+ < braunr> it's a simple alternative to cgroups though
+ < braunr> teythoon: dbus isn't a proble
+ < braunr> problem
+ < braunr> it's not that hard to fix
+ < youpi> well, it hasn't been fixed for a long time now :)
+ < braunr> we're being slow, that's all
+ < braunr> and interested by other things
+ < gg0> 12:58 < teythoon> btw, who is this heroxbd fellow and why has he
+ suddenly taken interest in so many debian gsoc projects?
+ < gg0> http://lists.debian.org/debian-hurd/2013/05/msg00133.html
+ < gg0> i notice nobody mentioned openrc
+ < pinotree> he's the debian student working on integrating openrc
+ < gg0> pinotree: no, the student is Bill Wang, Benda as he says is a
+ co-mentor
+ https://wiki.debian.org/SummerOfCode2013/Projects#OpenRC_init_system_in_Debian
+ < pinotree> whatever, it's still the openrc gsoc
+ < azeem> well, they wanted to look at it WRT the Hurd, did they follow-up
+ on this?
+ < gg0> btw wouldn't having openrc on hurd be interesting too?
+ < pinotree> imho not really
+ < gg0> no idea whether Bill is also trying to figure out what to do,
+ probably not
+ < azeem> somebody could ping that thread you mentioned above to see whether
+ they looked at the Hurd and/or need help/advice
+ < gg0> azeem: yeah somebody who could provide such help/advice. like.. you?
+ for instance
+ * gg0 can just paste urls
+ < azeem> they should just follow-up on-list
+
+
+## IRC, freenode, #hurd, 2013-08-28
+
+ <teythoon> anyone knows a user of cgroups that is not systemd? so far I
+ found libcg, that looks like a promising first target to port first,
+ though not surprisingly it is also somewhat linux specific
+ <taylanub> teythoon: OpenRC optionally uses cgroups IIRC.
+ <taylanub> Not mandatory because unlike systemd it actually tries (at all)
+ to be portable.
+
+
+## IRC, freenode, #hurd, 2013-09-02
+
+ <teythoon> braunr: I plan to patch gnumach so that the mach tasks keep a
+ reference to the task that created them and to make that information
+ available
+ <teythoon> braunr: is such a change acceptable?
+ <braunr> teythoon: what for ?
+ <teythoon> braunr: well, the parent relation is currently only implemented
+ in the Hurd, but w/o this information tracked by the kernel I don't see
+ how I can prevent malicious/misbehaving applications to break out of
+ cgroups
+ <teythoon> also I think this will enable us to fix the issue with tracking
+ which tasks belong to which subhurd in the long term
+ <braunr> ah cgroups
+ <braunr> yes cgroups should partly be implemented in the kernel ...
+ <braunr> teythoon: that doesn't surprise me
+ <braunr> i mean, i think it's ok
+ <braunr> the kernel should implement tasks and threads as closely as the
+ hurd (or a unix-like system) needs it
+ <teythoon> braunr: ok, cool
+ <teythoon> braunr: I made some rather small and straight forward changes to
+ gnumach, but it isn't doing what I thought it would do :/
+ <teythoon> braunr: http://paste.debian.net/33717/
+ <braunr> you added a field to task_basic_info
+ <braunr> thereby breaking the ABI
+ <teythoon> braunr: my small test program says: my task port is 1(pid 13)
+ created by task -527895648; my parent task is 31(pid 1)
+ <teythoon> braunr: no, it is not. I appended a field and these structures
+ are designed to be extendable
+ <braunr> hm
+ <braunr> ok
+ <braunr> although i'm not so sure
+ <braunr> there are macros defining the info size, depending on what you ask
+ <braunr> you may as well get garbage
+ <braunr> have you checked that ?
+ <teythoon> i initialized my struct to zero before calling mach
+ <braunr> teythoon: can you put some hardcoded value, just to make sure data
+ is correctly exported ?
+ <teythoon> braunr: right, good idea
+ <teythoon> braunr: my task port is 1(pid 13) created by task 3; my parent
+ task is 31(pid 1) -- so yes, hardcoding 3 works
+ <braunr> ok
+ <teythoon> braunr: also I gathered evidence that the convert_task_to_port
+ thing works, b/c first I did not have the task_reference call just before
+ that so the reference count was lowered (convert... consumes a reference)
+ and the parent task was destroyed
+ <teythoon> braunr: I must admit I'm a little lost. I tried to return a
+ reference to task rather than task->parent_task, but that didn't work
+ either
+ <teythoon> braunr: I feel like I'm missing something here
+ <teythoon> maybe I should get aquainted with the kernel debugger
+ <teythoon> err, the kernel debugger is not accepting any symbol names, even
+ though the binary is not stripped o_O
+ <teythoon> err, neither the kdb nor gdb attached to qemu translates
+ addresses to symbols, gdb at least translates symbols to addresses when
+ setting break points
+ <teythoon> how did anyone ever debug a kernel problem under these
+ conditions?
+ <braunr> teythoon: i'll have a look at it when i have some time
+
+
+## IRC, freenode, #hurd, 2013-09-03
+
+ <teythoon> :/ I believe the startup_notify interface is ill designed... an
+ translator can defer the system shutdown indefinitely
+ <braunr> it can
+ <teythoon> that's bad
+ <braunr> yes
+ <braunr> the hurd has a general tendency to trust its "no mutual trust
+ required" principle
+ <braunr> to rely on it a bit too much
+ <teythoon> well, at least it's a privileged operation to request this kind
+ of notification, no?
+ <braunr> why ?
+ <braunr> teythoon: it normally is used mostly by privileged servers
+ <braunr> but i don't think there is any check on the recipient
+ <teythoon> braunr: b/c getting the port to /hurd/init is done via
+ proc_getmsgport
+ <braunr> teythoon: ?
+ <teythoon> braunr: well, in order to get the notifications one needs the
+ msgport of /hurd/init and getting that requires root privileges
+ <braunr> teythoon: oh ok then
+ <braunr> teythoon: what's bad with it then ?
+ <teythoon> braunr: even if those translators are somewhat trusted, they can
+ (and do) contain bugs and stall the shutdown
+ <teythoon> I think this even happened to me once, I think it was the pfinet
+ translator
+ <braunr> teythoon: how do you want it to behave instead ?
+ <teythoon> braunr: well, /hurd/init notifies the processes sequentially,
+ that seems suboptimal, better to send async notifications to all of them
+ and then to collect all the answers
+ <teythoon> braunr: if one fails to answer within a rather large time frame
+ (say 5 minutes) shutdown anyway
+ <braunr> i agree with async notifications but
+ <braunr> i don't agree with the timeout
+ <teythoon> for reference, a (voluntary) timeout of 1 minute is hardcoded in
+ /hurd/init
+ <braunr> the timeout should be a parameter
+ <braunr> it's common on large machines to have looong shutdown delays
+ <teythoon> of the notification?
+ <braunr> the answer means "ok i'm done you can shutdown"
+ <braunr> well this can take long
+ <braunr> most often, administrators simply prefer to trust their program is
+ ok and won't take longer than it needs to, even if it's long
+ <teythoon> and not answering at all causes the shutdown / reboot to fail
+ making the system hang
+ <braunr> i know
+ <teythoon> in a state where it is not easily reached if you do not have
+ access to it
+ <braunr> but since it only concerns essential servers, it should befine
+ <braunr> essential servers are expected to behave well
+ <teythoon> it concerns servers that have requested a shutdown notification
+ <braunr> ok so no essential but system servers
+ <teythoon> essential servers are only exec, proc, /
+ <teythoon> yes
+ <braunr> the same applies
+ <pinotree> init and auth too, no?
+ <teythoon> yes
+ <braunr> you expect root not to hang himself
+ <teythoon> I do expect all software to contain bugs
+ <braunr> yes but you also expect them to provide a minimum level of
+ reliability
+ <braunr> otherwise you can just throw it all away
+ <teythoon> no, not really
+ <braunr> well
+ <teythoon> I know, that's my dilemma basically ;)
+ <braunr> if you don't trust your file system, you make frequent backups
+ <braunr> if you don't trust your shutdown code, you're ready to pull the
+ plug manually
+ <braunr> (or set a watchdog or whatever)
+ <braunr> what i mean is
+ <braunr> we should NEVER interfere with a program that is actually doing
+ its job just because it seems too long
+ <braunr> timeouts are almost never the best solution
+ <braunr> they're used only when necessary
+ <braunr> e.g. across networks
+ <braunr> it's much much much worse to interrupt a proper shutdown process
+ because it "seems too long" than just trust it behaves well 99999%%%% of
+ the time
+ <braunr> in particular because this case deals with proper data flushing,
+ which is an extremely important use case
+ <teythoon> it's hard/theoretically impossible to distinguish between taking
+ long and doing nothing
+ <braunr> it's impossible
+ <braunr> agreed
+ <braunr> => trust
+ <braunr> if you don't trust, you run real time stuff
+ <braunr> and you don't flush data on disk
+ <teythoon> ^^
+ <braunr> (which makes a lot of computer uses impossible as well)
+ <teythoon> there are only 2 people I trust, and the other one is not
+ /hurd/pfinet
+ <braunr> if this shutdown procedure is confined to the TCB, it's fine to
+ trust it goes well
+ <teythoon> tcb?
+ <braunr> trusted computing base
+ <braunr> http://en.wikipedia.org/wiki/Trusted_computing_base
+ * teythoon shudders
+ <teythoon> "trust" is used way to much these days
+ <teythoon> and I do not like the linux 2.0 ip stack to be part of our TCB
+ <braunr> basically, on a multiserver system like the hurd, the tcb is every
+ server on the path to getting a service done from a client
+ <braunr> then make it not request to be notified
+ <braunr> or make two classes of notifications
+ <braunr> because unprivileged file systems should be notified too
+ <teythoon> indeed
+ <teythoon> by the way, we should have a hurdish libnotify or something for
+ this kind of notifications
+ <braunr> but in any case, it should really be policy
+ <braunr> we should ... :)
+ <teythoon> ^^
+
+
+## IRC, freenode, #hurd, 2013-09-04
+
+ <teythoon> braunr: btw, I now believe that no server that requested
+ shutdown notifications can stall the shutdown for more than 1 minute
+ *unless* its message queue is full
+ <teythoon> so any fs should better sync within that timeframe
+ <braunr> where is this 1 min defined ?
+ <teythoon> init/init.c search for 60000
+ <braunr> ew
+ <teythoon> did I just find the fs corruption bug everyone was looking for?
+ <braunr> no
+ <braunr> what corruption bug ?
+ <teythoon> not sure, I thought there was still some issues left with
+ unclean filesystems every now and then
+ <teythoon> *causing
+ <braunr> yes but we know the reasons
+ <teythoon> ah
+ <braunr> involving some of the funniest names i've seen in computer
+ terminology :
+ <braunr> writeback causing "message floods", which in turn create "thread
+ storms" in the servers receiving them
+ <teythoon> ^^ it's usually the other way around, storms causing floods >,,
+ <braunr> teythoon: :)
+ <braunr> let's say it's a bottom-up approach
+ <teythoon> then the fix is easy, compile mach with -DMIGRATING_THREADS :)
+ <braunr> teythoon: what ?
+ <teythoon> well, that would solve the flood/storm issue, no?
+ <braunr> no
+ <braunr> the real solution is proper throttling
+ <braunr> which can stem from synchronous rpc (which is the real property we
+ want from migrating threads)
+ <braunr> but the mach writeback interface is async
+ <braunr> :p
+
+
+## IRC, freenode, #hurd, 2013-09-05
+
+ <braunr> teythoon: oh right, forgot about your port issue
+ <teythoon> don't worry, I figured by now that this must be a pointer
+ <teythoon> and I'm probably missing some magic that transforms this into a
+ name for the receiver
+ <teythoon> (though I "found" this function by looking at the mig
+ transformation for ports)
+ <braunr> i was wondering why you called the convert function manually
+ <braunr> instead of simply returning the task
+ <braunr> and let mig do the magic
+ <teythoon> b/c then I would have to add another ipc call, no?
+ <braunr> let me see the basic info call again
+ <braunr> my problem with this code is that it doesn't take into account the
+ ipc space of the current task
+ <braunr> which means you probably really return the ipc port
+ <braunr> the internal kernel address of the struct
+ <braunr> indeed, ipc_port_t convert_task_to_port(task)
+ <braunr> i'd personally make a new rpc instead of adding it to basic info
+ <braunr> basic info doesn't create rights
+ <braunr> what you want to achieve does
+ <braunr> you may want to make it a special port
+ <braunr> i.e. a port created at task creation time
+ <teythoon> y?
+ <braunr> it also means you need to handle task destruction and reparent
+ <teythoon> yes, I thought about that
+ <braunr> see
+ http://www.gnu.org/software/hurd/gnumach-doc/Task-Special-Ports.html#Task-Special-Ports
+ <braunr> for now you may simply turn the right into a dead name when the
+ parent dies
+ <braunr> although adding a call and letting mig do it is simpler
+ <braunr> mig handles reference counting, users just need to task_deallocate
+ once done
+ <teythoon> o_O mig does reference counting of port rights?
+ <braunr> mig/mach_msg
+ <teythoon> is there anything it *doesn't* do?
+ <braunr> i told you, it's a very complicated messaging interface
+ <braunr> coffee ?
+ <braunr> fast ?
+ <teythoon> ^^
+ <braunr> mig knows about copy_send/move_send/etc...
+ <braunr> so even if it doesn't do reference counting explicitely, it does
+ take care of that
+ <teythoon> true
+ <braunr> in addition, the magic conversions are intended to both translate
+ names into actual structs, and add a temporary reference at the same time
+ <braunr> teythoon: everything clear now ? :)
+ <teythoon> braunr: no, especially not why you suggested to create a special
+ port. but this will have to wait for tomorrow ;)
+
+
+## IRC, OFTC, #debian-hurd, 2013-09-06
+
+ <vorlon> teythoon: hi there
+ <vorlon> so I've been following your blog entries about cgroups on
+ hurd... very impressive :)
+ <vorlon> but I think there's a misunderstanding about upstart and
+ cgroups... your "conjecture" in
+ https://teythoon.cryptobitch.de/posts/what-will-i-do-next-cgroupfs-o/ is
+ incorrect
+ <vorlon> cgroups does not give us the interfaces that upstart uses to
+ define service readiness; adding support for cgroups is interesting to
+ upstart for purposes of resource partitioning, but there's no way to
+ replace ptrace with cgroups for what we're doing
+ <teythoon> vorlon: hi and thanks for the fish :)
+ <teythoon> vorlon: what is it exactly that upstart is doing with ptrace
+ then?
+ <teythoon> .,oO( your nick makes me suspicious for some reason... ;)
+ <teythoon> service readiness, what does that mean exactly?
+ <vorlon> teythoon: so upstart uses ptrace primarily for determining service
+ readiness. The idea is that traditionally, you know an init script is
+ "done" when it returns control to the parent process, which happens when
+ the service process has backgrounded/daemonized; this happens when the
+ parent process exits
+ <vorlon> in practice, however, many daemons do this badly
+ <vorlon> so upstart tries to compensate, by not just detecting that the
+ parent process has exited, but that the subprocess has exited
+ <vorlon> (for the case where the upstart job declares 'expect daemon')
+ <vorlon> cgroups, TTBOMK, will let you ask "what processes are part of this
+ group" and possibly even "what process is the leader for this group", but
+ doesn't really give you a way to detect "the lead process for this group
+ has changed twice"
+ <vorlon> now, it's *better* in an upstart/systemd world for services to
+ *not* daemonize and instead stay running in the foreground, but then
+ there's the question of how you know the service is "ready" before moving
+ on to starting other services that depend on it
+ <vorlon> systemd's answer to this is socket-based activation, which we
+ don't really endorse for upstart for a variety of reasons
+ <teythoon> hm, okay
+ <teythoon> so upstart does this only if expect daemon is declared in the
+ service description?
+ <vorlon> (in part because I've seen security issues when playing with the
+ systemd implementation on Fedora, which Lennart assures me are
+ corner-cases specific to cups, but I haven't had a chance to test yet
+ whether he's right)
+ <teythoon> and it is not used to track children, but only to observe the
+ daemonizing process?
+ <vorlon> yes
+ <teythoon> and it then detaches from the processes?
+ <vorlon> yes
+ <vorlon> once it knows the service is "ready", upstart doesn't care about
+ tracking it; it'll receive SIGCHLD when the lead process dies, and that's
+ all it needs to know
+ <teythoon> ok, so I misunderstood the purpose of the ptracing, thanks for
+ clarifying this
+ <vorlon> my pleasure :)
+ <vorlon> I realize that doesn't really help with the problem of hurd not
+ having ptrace
+ <teythoon> no, but thanks anyway
+ <vorlon> fwiw, the alternative upstart recommends for detecting service
+ readiness is for the process to raise SIGSTOP when it's ready
+ <vorlon> doesn't require ptracing, doesn't require socket-based activation
+ definitions; does require the service to run in a different mode than
+ usual where it will raise the signal at the correct time
+ <teythoon> right, but that requires patching it, same as the socket
+ activation stuff of systemd
+ <vorlon> (this is upstart's 'expect stop')
+ <vorlon> yes
+ <vorlon> though at DebConf, there were some evil ideas floating around
+ about doing this with an LD_PRELOAD or similar ;)
+ <vorlon> (overriding 'daemonize')
+ <vorlon> er, 'daemon()'
+ <teythoon> ^^
+ <vorlon> and hey, what's suspicious about my /nick? vorlons are always
+ trustworthy
+ <vorlon> ;)
+ <teythoon> sure they are
+ <teythoon> but could this functionality be reasonably #ifdef'ed out for a
+ proof of concept port?
+ <vorlon> hmm, you would need to implement some kind of replacement... if
+ you added cgroups support to upstart as an alternative
+ <vorlon> that could work
+ <vorlon> i.e., you would need upstart to know when the service has exited;
+ if you aren't using ptrace, you don't know the "lead pid" to watch for,
+ so you need some other mechanism --> cgroups
+ <vorlon> and even then, what do you do for a service like openssh, which
+ explicitly wants to leave child processes behind when it restarts?
+ <teythoon> right...
+ <vorlon> oh, I was hoping you knew the answer to this question ;) Since
+ AFAICS, openssh has no native support for cgroups
+ <teythoon> >,< I don't, but I'll think about what you've said... gotta go,
+ catch what's left of the summer ;)
+ <teythoon> fwiw I consider fork/exec/the whole daemonizing stuff fubar...
+ <teythoon> see you around :)
+ <vorlon> later :)
+
+
+## IRC, OFTC, #debian-hurd, 2013-09-07
+
+ <teythoon> vorlon: I thought about upstarts use of ptrace for observing the
+ daemonizing process and getting hold of the child
+ <teythoon> vorlon: what if cgroup(f)s would guarantee that the order of
+ processes listed in x/tasks is the same they were added in?
+ <teythoon> vorlon: that way, the first process in the list would be the
+ daemonized child after the original process died, no?
+ <vorlon> teythoon: that doesn't tell you how many times the "lead" process
+ has changed, however
+ <vorlon> you need synchronous notifications of the forks in order to know
+ that, which currently we only get via ptrace
+
+
+## IRC, OFTC, #debian-hurd, 2013-09-08
+
+ <teythoon> vorlon: ok, but why do the notifications have to be synchronous?
+ does that imply that the processes need to be stopped until upstart does
+ something?
+ <vorlon> teythoon: well, s/synchronous/reliable/
+ <vorlon> you're right that it doesn't need to be synchronous; but it can't
+ just be upstart polling the status of the cgroup
+ <vorlon> because processes may have come and gone in the meantime
+ <teythoon> vorlon: ok, cool, b/c the notifications of process changes I'm
+ hoping to introduce into the proc server for my cgroupfs do carry exactly
+ this kind of information
+ <vorlon> cool
+ <vorlon> are you discussing an API for this with the Linux cgroups
+ maintainers?
+ <teythoon> otoh it would be somewhat "interesting" to get upstart to use
+ this b/c of the way the mach message handling is usually implemented
+ <vorlon> :)
+ <teythoon> no, I meant in order for me to be able to implement cgroupfs I
+ had to create these kind of notifications, it's not an addition to the
+ cgroups api
+ <teythoon> is upstart multithreaded?
+ <vorlon> no
+ <vorlon> threads are evil ;)
+ <teythoon> ^^ I mostly agree
+ <vorlon> it uses a very nice event loop, leveraging signalfd among other
+ things
+ <teythoon> uh oh, signalfd sounds rather Linuxish
+ <pinotree> it is
+ <vorlon> I think xnox mentioned when he was investigating it that kfreebsd
+ now also supports it
+ <vorlon> but yeah, AFAIK it's not POSIX
+ <pinotree> it isn't, yes
+ <vorlon> but it darn well should be
+ <vorlon> :)
+ <vorlon> it's the best improvement to signal handling in a long time
+ <teythoon> systemd also uses signalfd
+ <teythoon> umm, it seems I was wrong about Hurd not having ptrace, the wiki
+ suggests that we do have it
+ <pinotree> FSVO "have"
+ <teythoon> ^^
+ <xnox> vorlon: teythoon: so ok kFreeBSD/FreeBSD ideally I'd be using
+ EVFILT_PROC from kevent which allows to receive events & track: exit,
+ fork, exec, track (follow across fork)
+ <xnox> upstart also uses waitid()
+ <xnox> so a ptrace/waitid should be sufficient to track processes, if Hurd
+ has them.
+
+
+## IRC, freenode, #hurd, 2013-09-09
+
+ <youpi> teythoon: yes, the shutdown notifications do stall the process
+ <youpi> but no more than a minute, or so
+ <youpi> teythoon: btw, did you end up understanding the odd thing in
+ fshelp_start_translator_long?
+ <youpi> I haven't had the time to have a look
+ <teythoon> youpi: what odd thing? the thing about being implemented by hand
+ instead of using the mig stub?
+ <youpi> the thing about the port being passed twice
+ <youpi> XXX this looks wrong to me, please have a look
+ <youpi> in the mach_port_request_notification call
+ <teythoon> ah, that was alright, yes
+ <youpi> ok
+ <youpi> so I can drop it from my TODO :)
+ <teythoon> this is done on the control port so that a translator is
+ notified if the "parent" translator dies
+ <teythoon> was that in fshelp_start_translator_long though? I thought that
+ was somewhere else
+ <youpi> that's what the patch file says
+ <youpi> +++ b/libfshelp/start-translator-long.c
+ <youpi> @@ -293,6 +293,7 @@ fshelp_start_translator_long (fshelp_open_fn_t
+ underlying_open_fn,
+ <youpi> + /* XXX this looks wrong to me, bootstrap is used twice as
+ argument... */
+ <youpi> bootstrap,
+ MACH_NOTIFY_NO_SENDERS, 0,
+ <teythoon> right
+ <teythoon> I remember that when I got a better grip of the idea of
+ notifications I figured that this was indeed okay
+ <teythoon> I'll have a quick look though
+ <youpi> ok
+ <teythoon> ah, I remember, this notifies the parent translator if the child
+ dies, right
+ <teythoon> and it is a NO_SENDERS notification, so it is perfectly valid to
+ use the same port twice, as we only hold a receive right
+
+
+## IRC, freenode, #hurd, 2013-09-10
+
+ <teythoon> braunr: are pthreads mapped 1:1 to mach threads?
+ <braunr> teythoon: yes
+ <teythoon> I'm reading the Linux cgroups "documentation" and it talks about
+ tasks (Linux threads) and thread group IDs (Linux processes) and I'm
+ wondering how to map this accurately onto Hurd concepts...
+ <teythoon> apparently on Linux there are PIDs/TIDs that can be used more or
+ less interchangeably from userspace applications
+ <teythoon> the Linux kernel however knows only PIDs, and each thread has
+ its own, and those threads belonging to the same (userspace) PID have the
+ same thread group id
+ <teythoon> aiui on Mach threads belong to a Mach task, and there is no
+ global unique identifier exposed for threads, right?
+ <teythoon> braunr: ^
+ <tschwinge> teythoon: There is its thread port, which in combination with
+ its task port should make it unique? (I might be missing context.)
+ <tschwinge> Eh, no. The task port's name will only locally be unique.
+ * tschwinge confused himself.
+ <teythoon> tschwinge, braunr: well, the proc server could of course create
+ TIDs for threads the same way it creates PIDs for tasks, but that should
+ probably wait until this is really needed
+ <teythoon> for the most part, the tasks and cgroup.procs files contain the
+ same information on Linux, and not differentiating between the two just
+ means that cgroupfs is not able to put threads into cgroups, just
+ processes
+ <teythoon> that might be enough for now
+
+
+## IRC, freenode, #hurd, 2013-09-11
+
+ <teythoon> ugh, some of the half-backed Linux interfaces will be a real
+ pain in the ass to support
+ <teythoon> they do stuff like write(2)ing file descriptors encoded as
+ decimal numbers for notifications :-/
+ <braunr> teythoon: for cgroup ?
+ <teythoon> braunr: yes, they have this eventfd based notification mechanism
+ <teythoon> braunr: but I fear that this is a more general problem
+ <braunr> do we need eventfd ?
+ <teythoon> I mean passing FDs around is okay, we can do this just fine with
+ ports too, but encoding numbers as an ascii string and passing that
+ around is just not a nice interface
+ <braunr> so what ?
+ <teythoon> it's not a designed interface, it's one people came up with b/c
+ it was easy to implement
+ <braunr> if it's meant for compatibility, that's ok
+ <teythoon> how would you implement this then? as a special case in the
+ write(2) implementation in the libc? that sounds horrible but I do hardly
+ see another way
+ <teythoon> ok, some more context: the cgroup documentation says
+ <teythoon> write "<event_fd> <control_fd> <args>" to cgroup.event_control.
+ <teythoon> where event_fd is the eventfd the notification should be sent to
+ <pinotree> theorically they could have used sendmsg + a custom payload
+ <teythoon> control_fd is an fd to the pseudo file one wants notifications
+ for
+ <teythoon> yes, they could have, that would have been nicer to implement
+ <teythoon> but this...
+
+
+## IRC, freenode, #hurd, 2013-09-12
+
+ <teythoon> ugh, gnumachs build system drives me crazy %-/
+ <pinotree> oh there's worse than that
+ <teythoon> I added a new .defs file, did as Makerules.mig.am told me to do,
+ but it still does not create the stubs I need
+ <braunr> teythoon: gnumach doesn't
+ <braunr> teythoon: glibc does
+ <braunr> well, gnumach only creates the stubs it needs
+ <braunr> teythoon: you should perhaps simply use gnumach.defs
+ <teythoon> braunr: sure it does, e.g. vm/memory_object_default.user.c
+ <braunr> teythoon: what are you trying to add ?
+ <teythoon> braunr: I was trying to add a notification mechanism for new
+ tasks
+ <teythoon> b/c now the proc server has to query all task ports to discover
+ newly created tasks, this seems wasteful
+ <teythoon> also if the proc server could be notified on task creation, the
+ parent task is still around, so the notification can carry a reference to
+ it
+ <teythoon> that way gnumach wouldn't have to track the relationship, which
+ would create all kind of interesting questions, like whether tasks would
+ have to be reparented if the parent dies
+ <braunr> teythoon: notifications aren't that simple either
+ <teythoon> y not?
+ <braunr> 1/ who is permitted to receive them
+ <braunr> 2/ should we contain them to hurd systems ? (e.g. should a subhurd
+ receive notifications concerning tasks in other hurd systems ?)
+ <teythoon> that's easy imho. 1/ a single process that has a host_priv
+ handle is able to register for the notifications once
+ <braunr> what are the requirements so cgroups work as expected concerning
+ tasks ?
+ <braunr> teythoon: a single ?
+ <teythoon> i.e. the first proc server that starts
+ <braunr> then how will subhurd proc servers work ?
+ <teythoon> 2/ subhurds get the notifications from the first proc server,
+ and only those that are "for them"
+ <braunr> ok
+ <braunr> i tend to agree
+ <braunr> this removes the ability to debug the main hurd from a subhurd
+ <teythoon> this way the subhurds proc server doesn't even have to have the
+ host_priv porsts
+ <teythoon> yes, but I see that as a feature tbh
+ <braunr> me too
+ <braunr> and we can still debug the subhurd from the main
+ <teythoon> it still works the other way around, so it's still...
+ <teythoon> yes
+ <braunr> what would you include in the notification ?
+ <teythoon> a reference to the new task (proc needs that anyway) adn one to
+ the parent task (so proc knows what the parent process is and/or for
+ which subhurd it is)
+ <braunr> ok
+ <braunr> 17:21 < braunr> what are the requirements so cgroups work as
+ expected concerning tasks ?
+ <braunr> IOW, why is the parental relation needed ?
+ <braunr> (i don't know much about the details of cgroup)
+ <teythoon> well, currently we rely on proc_child to build this relation
+ <teythoon> but any task can use task_create w/o proc_child
+ <teythoon> until one claims a newly created task with proc_child, its
+ parent is pid 1
+ <braunr> that's about the hurd
+ <braunr> i'm rather asking about cgroups
+ <teythoon> ah
+ <teythoon> the child process has to end up in the same cgroup as the parent
+ <braunr> does a cgroup include its own pid namespace ?
+ <teythoon> not quite sure what you mean, but I'd say no
+ <teythoon> do you mean pid namespace as in the Linux sense of that phrase?
+ <teythoon> cgroups group processes(threads) into groups
+ <teythoon> on Linux, you can then attach controllers to that, so that
+ e.g. scheduling decisions or resource restrictions can be applied to
+ groups
+ <teythoon> braunr: http://paste.debian.net/38950/
+ <braunr> teythoon: ok so a cgroup is merely a group of processes supervised
+ by a controller
+ <braunr> for resource accounting/scheudling
+ <braunr> teythoon: where does dev_pager.c do the same ?
+ <teythoon> braunr: yes. w/o such controllers cgroups can still be used for
+ subprocess tracking
+ <teythoon> braunr: well, dev_pager.c uses mig generated stubs from
+ memory_object_reply.defs
+ <braunr> ah memory_object_reply ok
+ <braunr> teythoon: have you tried adding it to EXTRA_DIST ?
+ <braunr> although i don't expect it will change much
+ <braunr> teythoon: hum, you're not actually creating client stubs
+ <braunr> create a kern/task_notify.cli file
+ <braunr> as it's done with device/memory_object_reply.cli
+ <braunr> see #define KERNEL_USER 1
+ <teythoon> braunr: right, thanks :)
+
+
+## IRC, freenode, #hurd, 2013-09-13
+
+ <teythoon> hm, my notification system for newly created tasks kinda works
+ <teythoon> as in I get notified when a new task is created
+ <teythoon> but the ports for the new task and the parent that are carried
+ in the notification are both MACH_PORT_DEAD
+ <teythoon> do I have to add a reference manually before sending it?
+ <teythoon> that would make sense, the mig magic transformation function for
+ task_t consumes a reference iirc
+ <braunr> ah yes
+ <braunr> that reference counting stuff is some hell
+ <teythoon> braunr: ah, there's more though, the mig transformations are
+ only done in the server stub, not in the client, so I still have to
+ convert_task_to_port myself afaics
+ <teythoon> awesome, it works :)
+ <braunr> :)
+ <teythoon> ugh, the proc_child stuff is embedded deep into libc and signal
+ handling stuff...
+ <teythoon> "improving" the child_proc stuff with my shiny new notifications
+ wrecks havoc on the system
+
+
# Required Interfaces
In the thread starting
diff --git a/open_issues/thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock.mdwn b/open_issues/thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock.mdwn
index cf41550d..7159551d 100644
--- a/open_issues/thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock.mdwn
+++ b/open_issues/thread-cancel_c_55_hurd_thread_cancel_assertion___spin_lock_locked_ss_critical_section_lock.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2010 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2010, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -16,26 +16,37 @@ License|/fdl]]."]]"""]]
IRC, unknown channel, unknown date:
- <youpi> azeem, marcus: ext2fs.static: thread-cancel.c:55: hurd_thread_cancel: Assertion '! __spin_lock_locked (&ss->critical_section_lock)' failed
+ <youpi> azeem, marcus: ext2fs.static: thread-cancel.c:55:
+ hurd_thread_cancel: Assertion '! __spin_lock_locked
+ (&ss->critical_section_lock)' failed
<youpi> I actually don't understand this assertion
<youpi> it's just before __spin_lock (&ss->critical_section_lock);
<youpi> why should one check that a lock is free before taking it ?
<youpi> just the same in hurdexec.c
- <youpi> (no, ss is not our own sigstate, so it's not safe to assume no other path can take it)
+ <youpi> (no, ss is not our own sigstate, so it's not safe to assume no
+ other path can take it)
<youpi> there's another one in sysdeps/mach/hurd/spawni.c
<youpi> and jmp-unwind.c
- <antrik> youpi: why do you think it's nonsense?... the fact that we take the lock (so we can't be interrupted) doesn't mean we are willing to wait for others to release the lock... maybe the code path should never be reached while others have a lock, or something
+ <antrik> youpi: why do you think it's nonsense?... the fact that we take
+ the lock (so we can't be interrupted) doesn't mean we are willing to wait
+ for others to release the lock... maybe the code path should never be
+ reached while others have a lock, or something
<youpi> then it's useless to take the lock
- <youpi> "we take the lock (so we can't be interrupted)": no, it's not _our_ lock here, it's the lock of the thread we want to cancel
- <antrik> what exactly is cancelling a thread?... (sorry, I don't really have experience with thread programming)
+ <youpi> "we take the lock (so we can't be interrupted)": no, it's not _our_
+ lock here, it's the lock of the thread we want to cancel
+ <antrik> what exactly is cancelling a thread?... (sorry, I don't really
+ have experience with thread programming)
<youpi> ~= killing it
- <antrik> well, we take the lock so nobody can mess with the thread while we are cancelling it, no?...
+ <antrik> well, we take the lock so nobody can mess with the thread while we
+ are cancelling it, no?...
<youpi> yes
<youpi> that is fine
- <youpi> but checking that the lock is free before taking it doesn't make sense
+ <youpi> but checking that the lock is free before taking it doesn't make
+ sense
<youpi> why nobody should be able to take the lock ?
- <youpi> and if nobody is, why do we take it ? (since nobody would be able to take it)
- <antrik> well, maybe after taking the lock, we do some action that might result in others trying to take it...
+ <youpi> and if nobody is, why do we take it ? (since nobody would be able
+ to take it)
+ <antrik> well, maybe after taking the lock, we do some action that might
+ result in others trying to take it...
<youpi> nope: look at the code :)
<youpi> or maybe the cancel_hook, but I really doubt it
-
diff --git a/open_issues/time.mdwn b/open_issues/time.mdwn
index becb88b0..367db872 100644
--- a/open_issues/time.mdwn
+++ b/open_issues/time.mdwn
@@ -11,6 +11,11 @@ License|/fdl]]."]]"""]]
[[!tag open_issue_porting]]
+[[!toc]]
+
+
+# `time`
+
Neither the `time` executable from the GNU time package work completely
correctly, nor does the GNU Bash built-in one.
@@ -56,20 +61,20 @@ As above; also here all the running time should be attributed to *user* time.
This is probably a [[!taglink open_issue_gnumach]].
-# 2011-09-02
+## 2011-09-02
Might want to revisit this, and take Xen [[!tag open_issue_xen]] into account
-- I believe flubber has already been Xenified at that time.
-## IRC, freenode, #hurd, 2011-09-02
+### IRC, freenode, #hurd, 2011-09-02
While testing some [[performance/IPC_virtual_copy]] performance issues:
<tschwinge> And I can confirm that with dd if=/dev/zero of=/dev/null bs=4k
running, a parallel sleep 10 takes about 20 s (on strauss).
-# 2013-03-30/31
+## 2013-03-30/31
Investigating time's `configure`, a difference of the output between Linux and
Hurd shows:
@@ -81,3 +86,754 @@ This causes a different code path in `resuse.c` to be used; such code path does
not get a define for `HZ`, which is then defined with a fallback value of 60.
[[!debbug 704283]] has been filed with a fix for this no-wait3 case.
+
+
+# `times`
+
+## guile
+
+### IRC, freenode, #hurd, 2013-08-21
+
+ <nalaginrut> does guile2 on hurd fixed? times issue
+ <teythoon> nalaginrut: does not look good
+ <teythoon> scheme@(guile-user)> (times)
+ <teythoon> $1 = #(0 0 0 0 0)
+ <nalaginrut> well, seems not a fixed version, if there's fixed version
+ <nalaginrut> since it's not Guile's bug, I can do nothing for it
+ <teythoon> ah
+ <nalaginrut> in spite of this, Guile2 works I think
+ <nalaginrut> all tests passed but 2 fail
+ <nalaginrut> one of the failure is version shows "UNKNOWN" which is
+ trivials
+ <teythoon> well, did you try to fix the times issue in Hurd?
+ <nalaginrut> I didn't , I have to get more familiar with hurd first
+ <nalaginrut> I'm playing hurd these days
+ <teythoon> :)
+ <nalaginrut> anyway, I think times issue is beyond my ability at present
+ <nalaginrut> ;-P
+ <teythoon> times is implemented in the glibc, in sysdeps/mach/hurd/times.c
+ <teythoon> don't say that before you had a look
+ <nalaginrut> yes, you're right
+ <nalaginrut> but I think times has something to do with the kernel time
+ mechanism, dunno if it's related to the issue
+ <nalaginrut> how did you get the times.c under hurd?
+ <nalaginrut> apt-get source glibc?
+ <teythoon> well, I'd clone git://sourceware.org/git/glibc.git
+ <teythoon> and yes, the kernel is involved
+ <teythoon> task_info is used to obtain the actual values
+ <teythoon>
+ http://www.gnu.org/software/hurd/gnumach-doc/Task-Information.html
+ <teythoon> I'd guess that something fails, but the times(2) interface is
+ not able to communicate the exact failure
+ <nalaginrut> maybe it's not proper to get src from upstream git? since it's
+ OK under Linux which uses it too
+ <nalaginrut> but apt-get source glibc has nothing
+ <teythoon> so I would copy the times(2) implementation from the libc so
+ that you can modify it and run it as a standalone program
+ <teythoon> well, the libc has system dependent stuff, times(2) on Linux is
+ different from the Hurd version
+ <teythoon> it has to be
+ <nalaginrut> alright, I got what you mean ;-)
+ <teythoon> and the debian libc is built from the eglibc sources, so the
+ source package is called eglibc iirc
+ <nalaginrut> ah~I'll try
+ <teythoon> have you tried to rpctrace your times test program? the small c
+ snippet you posted the other day?
+ <nalaginrut> I haven't build all the tools & debug environment on my hurd
+ ;-(
+ <teythoon> what tools?
+ <nalaginrut> well, I don't even have git on it, and I'm installing but
+ speed is slow, I'm looking for a new mirror
+ <teythoon> ah well, no need to do all this on the Hurd directly
+ <teythoon> building the libc takes like ages anyway
+ <nalaginrut> oops ;-)
+ <nalaginrut> I'll take your advice to concentrate on times.c only
+ <teythoon> oh well, it might be difficult after all, not sure though
+ <teythoon> times sends two task_info messages, once with TASK_BASIC_INFO,
+ once with TASK_THREAD_TIMES_INFO
+ <teythoon> here is the relevant rpctrace of your test program:
+ <teythoon> task131(pid14726)->task_info (1 10) = 0 {0 25 153427968 643072 0
+ 0 0 0 1377065590 570000}
+ <teythoon> task131(pid14726)->task_info (3 4) = 0 {0 0 0 10000}
+ <teythoon> ok, I don't know enough about that to be honest, but
+ TASK_THREAD_TIMES_INFO behaves funny
+ <teythoon> I put a sleep(1) into your test program, and if I rpctrace it,
+ it behaves differently o_O
+ * nalaginrut is reading task-information page to get what it could be
+ <nalaginrut> maybe I have to do the same steps under Linux to find some
+ clue
+ <teythoon> no, this is Mach specific, there is no such thing on Linux
+ <teythoon> on Linux, times(2) is a system call
+ <teythoon> on Hurd, times is a function implemented in the libc that
+ behaves roughly the same way
+ <nalaginrut> OK~so different
+ <teythoon> look at struct task_basic_info and struct task_thread_times_info
+ in the task-information page for the meaning of the values in the
+ rpctrace
+ <teythoon> yes, very
+ <braunr> nalaginrut: you may want to try a patch i did but which is still
+ waiting to be merged in glibc
+ <nalaginrut> braunr: ah~thanks for did it ;-)
+ <nalaginrut> can I have the link?
+ <braunr> i'm getting it
+ <braunr> teythoon: funny things happen with rpctrace, that's expected
+ <braunr> keep in mind rpctrace doesn't behave like ptrace at all
+ <braunr> it acts as a proxy
+ <braunr> nalaginrut:
+ http://git.savannah.gnu.org/cgit/hurd/glibc.git/commit/?h=rbraun/getclktck_100_hz&id=90404d6d1aa01f6ce1557841f5a675bb6a30f508
+ <braunr> nalaginrut: you need to add it to the debian eglibc patch list,
+ rebuild the packages, and install the resulting .debs
+ <braunr> if you have trouble doing it, i'll make packages when i have time
+ <nalaginrut> braunr: I think your test result is expected? ;-)
+ <braunr> what test result ?
+ <nalaginrut> times test under that patch
+ <braunr> yes
+ <braunr> but i have no idea if it will work
+ <braunr> my patch fixes a mismatch between glibc and the procfs server
+ <braunr> nothing more
+ <braunr> it may help, it may not, that's what i'd like to know
+ <nalaginrut> hah~thanks for that
+ <nalaginrut> I get source from apt-get, then manually modified the files,
+ no much code ;-)
+ <nalaginrut> compiling
+ <nalaginrut> there is no cpuinfo in /proc?
+ <teythoon> no
+ <nalaginrut> a feature need to be done? or there's another way for that?
+ <teythoon> well, it hasn't been implemented
+ <teythoon> do you need that? what for?
+ <nalaginrut> compiling error, I realized I should use gcc-4.7
+ <pinotree> how are you building?
+ <nalaginrut> I just happened to play proc while compiling, and found
+ there's no
+ <nalaginrut> cxa_finalize.c:48:1: error: ‘tcbhead_t’ has no member
+ named ‘multiple_threads’
+ <nalaginrut> I changed to gcc-4.7
+ <pinotree> just edit the sources, and then dpkg-buildpackage -nc -us -uc
+ <pinotree> that will rebuild the debian package as it would be in a debian
+ build, making sure all the build dependencies are there, etc
+ <pinotree> doing it different than that is just wrong™
+ <nalaginrut> ok, doing
+ <pinotree> were you really doing ./configure etc yourself?
+ <nalaginrut> well, I can't wait till it's done, I'll let it compile and
+ check it out tomorrow
+ <nalaginrut> I used configure, yes ;-P
+ <pinotree> not good
+ <nalaginrut> I have to go, thanks for help guys
+
+
+### IRC, freenode, #hurd, 2013-08-22
+
+ < nalaginrut> eglibc was done by dpkg-buildpackage, then how to install it?
+ (sorry I'm a brand new debian users)
+ < nalaginrut> oh~I found it
+ < nalaginrut> yes, (times) returns reasonable result ;-)
+ * nalaginrut is trying 'make check'
+ < nalaginrut> unfortunately, it can't pass the test though, I'm researching
+ it, anyway, we made first step
+ < nalaginrut> for Hurd internal-time-units-per-second will be 1000
+ < nalaginrut> , but the elapsed time is far larger than (* 2
+ internal-time-units-per-second)
+ < nalaginrut> I think the different of two returned clocks after 1 second
+ should be the TIME_UNITS_PER_SECOND, in principle
+ < nalaginrut> but I'm not sure if it's elibc or Guile bug
+ < nalaginrut> dunno, maybe clock tick should be 1000?
+ < nalaginrut> well, I'll try clock per second as 1000
+ < braunr> nalaginrut: clock tick (or actually, the obsolete notion of a
+ clock tick in userspace) should be 100
+ < braunr> nalaginrut: how did you come with 1000 ?
+ < nalaginrut> braunr: Guile set TIME_UNITS_PER_SECOND to 1000 when there's
+ no 8bytes size and doesn't define HAVE_CLOCK_GETTIME
+ < nalaginrut> #if SCM_SIZEOF_LONG >= 8 && defined HAVE_CLOCK_GETTIME
+ < nalaginrut> #define TIME_UNITS_PER_SECOND 1000000000
+ < nalaginrut> #else
+ < nalaginrut> #define TIME_UNITS_PER_SECOND 1000
+ < nalaginrut> #endif
+ < nalaginrut> and the test for 'times' used time-units-per-second
+ < pinotree> what has sizeof(long) have to do with time units per second?
+ < nalaginrut> dunno, maybe the representation of time?
+ < nalaginrut> the test failed since the difference between two clocks after
+ 1sec is too large
+ < nalaginrut> and for the test context, it should small than 2 times of
+ units-per-second
+ < nalaginrut> should be smaller
+ < nalaginrut> sorry for bad English
+ < pinotree> aren't you basically looking for clock_getres?
+ < nalaginrut> pinotree: I don't understand what you mean
+ < pinotree>
+ http://pubs.opengroup.org/onlinepubs/9699919799/functions/clock_getres.html
+ < nalaginrut> I wonder if there's a standard CLK_PER_SEC for Hurd
+ < nalaginrut> or it can be modified as wish
+ < pinotree> why do you need it?
+ < nalaginrut> the difference is 10,000,000, which can never less than
+ 2*clock_per_second
+ < nalaginrut> pinotree: I don't need it, but I want to know if there's a
+ standard value
+ < braunr> nalaginrut: ok so, this is entirely a guile thing
+ < braunr> nalaginrut: did you test with my patch ?
+ < nalaginrut> braunr: yes, 'times' works fine
+ < braunr> but even with that, a tets fails ?
+ < braunr> test*
+ < nalaginrut> well, I can't say works fine, the proper description is "now
+ it has reasonable result"
+ < braunr> youpi: could you bring
+ http://darnassus.sceen.net/gitweb/savannah_mirror/glibc.git/commit/90404d6d1aa01f6ce1557841f5a675bb6a30f508
+ into debian glibc btw ?
+ < nalaginrut> braunr: it failed the test since the clock run too fast, but
+ it should be smaller than 2*clk-per-sec
+ < braunr> i don't get that
+ < braunr> can you show the code that checks the condition ?
+ < nalaginrut> braunr: http://pastebin.com/sG3QxnPt
+ < braunr> * 0.5 internal-time-units-per-second ?
+ < nalaginrut> for C users, it's just like
+ a=times(...);sleep(1);b=times(...); then time-units-per-sec/2 <= (b-a) <=
+ time-units-per-sec*2
+ < braunr> ah ok
+ < nalaginrut> the test passes when it's true
+ < braunr> so basically, it says sleep(1) sleeps for more than 2 seconds
+ < braunr> can you check the actual value ?
+ < braunr> b-a
+ < nalaginrut> hold on for minutes
+ < nalaginrut> it's 10,000,000
+ < nalaginrut> for clk-per-sec=1000,000,000, it's OK
+ < nalaginrut> but for 100 or 1000, it's too small
+ < braunr> let's forget 100
+ < braunr> guile uses 1000
+ < nalaginrut> OK
+ < braunr> but i still don't get why
+ < nalaginrut> so I asked if there's standard value, or it can be ajustified
+ < nalaginrut> adjusted
+ < braunr> ok so, times are expressed in clock ticks
+ < braunr> are you sure you're using a patched glibc ?
+ < nalaginrut> yes I used your patch, and the 'times' get reasonable result
+ < braunr> then
+ < braunr> 11:28 < nalaginrut> it's 10,000,000
+ < braunr> doesn't make sense
+ < nalaginrut> hmm
+ < braunr> anhd i don't understand the test
+ < braunr> what's tms:clock new ?
+ < nalaginrut> it's actually the return value of 'times'
+ < nalaginrut> Guile wrap the clock_t and tms to a vector, then we can get
+ all the thing in a row
+ < nalaginrut> 'new' is a variable which was gotten after 1 sec
+ < braunr> let's see what this does exactly
+ < nalaginrut> equal to "new = times(...)"
+ < nalaginrut> 'tms' equal to (clock_t (struct tms))
+ < nalaginrut> we have to pass in the struct pointer to get the struct
+ values filled, but for Guile we don't use pointer, times actually returns
+ two things: clock_t and struct tms
+ < nalaginrut> and Guile returns them as a vector in a row, that's it
+ < braunr> nalaginrut: test this please:
+ http://darnassus.sceen.net/~rbraun/test.c
+ < braunr> i don't have a patched libc here
+ < braunr> i'll build one right now
+ < nalaginrut> clock ticks: 1000000
+ < braunr> and this seems reasonable to you ?
+ < braunr> anyway, i think the guile test is bugged
+ < nalaginrut> no, the reasonable is not for this
+ < braunr> does it ever get the clock tick value from sysconf() ?
+ < nalaginrut> I say reasonable since it's always 0 both for clock and tms,
+ before apply your patch
+ < braunr> uh no
+ < braunr> i have the same value, without my patch
+ < nalaginrut> so I said "I can't say it works fine"
+ < braunr> either the test is wrong because it doesn't use sysconf()
+ < nalaginrut> anyway, I don't think times should return "all zero"
+ < braunr> or the clock values have already been ocnverted
+ < braunr> but it doesn't
+ < braunr> you did something wrong
+ < nalaginrut> with your patch it doesn't
+ < braunr> without neither
+ < braunr> 11:43 < braunr> i have the same value, without my patch
+ < nalaginrut> well, it's too strange
+ < braunr> check how the test actually gets the clock values
+ < braunr> also, are your running in vbox ?
+ < braunr> you*
+ < nalaginrut> no ,it's physical machine
+ < braunr> oh
+ < braunr> nice
+ < braunr> note that vbox has timing issues
+ < nalaginrut> I thought I should give you some info of CPU, but there's no
+ /proc/cpuinfo
+ < braunr> shouldn't be needed
+ < nalaginrut> OK
+ < braunr> run my test again with an unpatched glibc
+ < braunr> just to make sure it produces the same result
+ < braunr> and
+ < nalaginrut> so the clock-per-sec is machine independent for Hurd I think
+ < braunr> 11:46 < braunr> check how the test actually gets the clock values
+ < nalaginrut> since it's implemented in userland
+ < braunr> clock-per-sec is always system dependent
+ < braunr> All times reported are in clock ticks.
+ < braunr> The number of clock ticks per second can be obtained
+ using:
+ < braunr> sysconf(_SC_CLK_TCK);
+ < braunr> 11:46 < braunr> check how the test actually gets the clock values
+ < braunr> to see if they're converted before reaching the test code or not
+ * nalaginrut is building eglibc
+ < braunr> building ?
+ < braunr> what for ?
+ < nalaginrut> I modified it to 1000, now it's useless
+ < braunr> we want it to 100 either way
+ < nalaginrut> and how to reinstall eglibc under debian?
+ < braunr> it's obsolete, procfs already uses 100, and 100 is low enough to
+ avoid overflows in practically all cases
+ < braunr> aptitude install libc0.3=<version>
+ < nalaginrut> OK
+ < braunr> aptitude show -v libc0.3
+ < braunr> for the list of available versions
+ < nalaginrut> out of topic, what's the meaning of the code in
+ quantize_timeval ?
+ < nalaginrut> tv->tv_usec = ((tv->tv_usec + (quantum - 1)) / quantum) *
+ quantum;
+ < nalaginrut> I can't understand this line
+ < braunr> scaling and rounding i guess
+ < nalaginrut> hmm...but quantum seems always set to 1?
+ < nalaginrut> 100/__getclktck()
+ < braunr> ah right
+ < braunr> old crap from the past
+ < nalaginrut> and clk-tck is 100
+ < braunr> the author probably anticipated clk_ticks could vary
+ < braunr> in practice it doesn't, and that's why it's been made obsolete
+ < nalaginrut> I wonder if it could be vary
+ < braunr> no
+ < nalaginrut> alright
+ < nalaginrut> why not just assign it to 1?
+ < braunr> 11:55 < braunr> old crap from the past
+ < braunr> the hurd is 20 years old
+ < braunr> like linux
+ < nalaginrut> oh~
+ < braunr> but with a lot less maintenance
+ < nalaginrut> braunr: well, I tried the original eglibc, your test was
+ clock ticks: 1000000
+ < nalaginrut> but in Guile, (times) ==> (0 0 0 0 0)
+ < nalaginrut> the reasonable result maybe: #(4491527510000000 80000000 0 0
+ 0)
+ < braunr> 11:46 < braunr> check how the test actually gets the clock values
+ < braunr> ah, he left
+
+
+### IRC, freenode, #hurd, 2013-08-23
+
+ < braunr> nalaginrut: times() doesn't seem to be affected by my patch at
+ all
+ < nalaginrut> braunr: but it did in my machine
+ < nalaginrut> well, I think you mean it doesn't affect your C test code
+ < braunr> i'm almost sure something was wrong in your test
+ < braunr> keep using the official debian glibc package
+ < nalaginrut> I don't think it's test issue, since every time (times)
+ return zero, the test can never get correct result
+ < braunr> times doesn't return 0
+ < braunr> for sleep(1), i always have the right result, except in
+ microseconds
+ < nalaginrut> times in Guile always return #(0 0 0 0 0)
+ < braunr> (microseconds is the native mach time unit)
+ < braunr> well, guile does something wrong
+ < nalaginrut> after sleep 1, it's 0 again, so it's none sense
+ < braunr> 11:46 < braunr> check how the test actually gets the clock values
+ < braunr> not on my system
+ < nalaginrut> but (times) returns reasonable result after applied your
+ patch
+ < braunr> that's not normal, since times isn't affected by my patch
+ < nalaginrut> oops
+ < braunr> you need to look for what happens in guile between the times()
+ call and the #(0 0 0 0 0) values
+ < nalaginrut> well, I tried many times between patch or non-patch, I think
+ there's no mistake
+ < nalaginrut> I read the 'times' code in Guile, there's nothing strange,
+ just call 'times' and put all the result to a vector
+ < braunr> which means there is no conversion
+ < braunr> in which case the test is plain wrong since there MUST also be a
+ call to sysconf()
+ < braunr> to obtain the right clock ticks value
+ < braunr> is your box reachable with ssh ?
+ < nalaginrut> oh~wait, seems there's a quotient operation, I'm checking
+ < nalaginrut> factor = scm_quotient (scm_from_long (TIME_UNITS_PER_SECOND),
+ < nalaginrut> scm_from_long (ticks_per_second));
+ < braunr> iirc, TIME_UNITS_PER_SECOND is hardcoded
+ < nalaginrut> unless factor is zero
+ < nalaginrut> yes, it's hardcoded
+ < braunr> that's completely non portable and wrong
+ < nalaginrut> you suggest to call sysconf?
+ < braunr> yes
+ < braunr> but i don't have the code in mind
+ < braunr> what is ticks_per_second ?
+ < nalaginrut> OK, that's one issue, we have to find why times return 0
+ < braunr> 14:14 < braunr> is your box reachable with ssh ?
+ < braunr> i'd like to make sure times returns 0 at your side
+ < braunr> because it doesn't at mine
+ < nalaginrut> no
+ < braunr> until i can reproduce, i can't consider there is a problem
+ < nalaginrut> I think it's unreachable for outer space
+ < nalaginrut> well, if you want to reproduce, just get guile src of debian
+ < braunr> guile 2.0 ?
+ < nalaginrut> yes, apt-get source guile-2.0
+ < nalaginrut> I'm checking ticks_per_second
+ < braunr> got the source, how do i test
+ < braunr> ?
+ < nalaginrut> you have to build it, and run ./meta/guile, then you don't
+ have to install it
+ < nalaginrut> and try (times)
+ < braunr> aw libgc
+ < nalaginrut> the reasonable result should be #(4313401920000000 110000000
+ 20000000 0 0) or something alike
+ < nalaginrut> but #(0 0 0 0 0) in each time is not reasonable apparently
+ < nalaginrut> maybe you need apt-get build-dep guile-2.0?
+ < braunr> already done
+ < nalaginrut> building Guile2 may take very long time
+ < nalaginrut> about 30 minutes in my old machine
+ < braunr> then it should take just a few minutes on mine
+ < nalaginrut> alright it's not very long, I've spent 8 hours for gcc in LFS
+ < braunr> 8 hours ?
+ < braunr> takes 5-10 minutes on a common machine ..
+ < nalaginrut> but it's Celeron566 at that time...
+ < braunr> ah, that old
+ < nalaginrut> include bootstrap, so very long
+ < braunr> nalaginrut: i got the test failure from the build procedure, how
+ do i run it manually ?
+ < nalaginrut> braunr: ./meta/guile -L test-suite
+ test-suite/tests/time.test
+ < nalaginrut> braunr: or make check for all
+ < braunr> put a print after the schedule() and before the return nil; in
+ runtime_mstart, since that's the body of new threads
+ < nlightnfotis> unfortunately, I can't confirm this with goroutines
+ running; the assertion failure aborts before I can get anything useful
+ < braunr> you can
+ < braunr> make sure there is a \n in the message, since stdout is line
+ buffered by default
+ < braunr> if you don't reach that code, it means threads don't exit
+ < braunr> at least goroutine threads
+ < braunr> btw, where is the main thread running ?
+ < nlightnfotis> I just checked there is a \n at the end.
+ < nlightnfotis> "<braunr> btw, where is the main thread running " could you
+ elaborate a little bit on this?
+ < braunr> what does main() after initializing the runtime ?
+ < braunr> +do
+ < nlightnfotis> the runtime main or the process's main?
+ < braunr> the process
+ < braunr> nlightnfotis: what we're interested in is knowing whether main()
+ exits or not
+ < nlightnfotis> braunr: I can see there are about 4 functions of interest:
+ runtime_main (the main goroutine, and I can imagine 1st thread)
+ < nlightnfotis> main_init (I don't know what it does, will check this out
+ now)
+ < nlightnfotis> main_main (not sure about this one either)
+ < nlightnfotis> and runtime_exit (0)
+ < braunr> i can see that too
+ < braunr> i'm asking about main()
+ < nlightnfotis> which seems to be the function that terminates the main
+ thread
+ < nlightnfotis> <braunr> nlightnfotis: what we're interested in is knowing
+ whether main() exits or not --> my theory is runtime_exit (0) exits the
+ process' main. Seeing as at various times go programs echo $? == 0.
+ < nlightnfotis> let me research that a little bit
+ < nlightnfotis> braunr: that will require a bit more studying. main_main()
+ and main_init() are both expanded to assembly tags if I understand it
+ correctly.
+ < nlightnfotis> main.main and __go_init_main respectively.
+ < braunr> why are you looking from there instead of looking from main() ?
+ < nlightnfotis> are we not looking out if main exits?
+ < braunr> we are
+ < braunr> so why look at main_main ?
+ < braunr> or anything else than main ?
+ < nlightnfotis> these are called inside runtime_main and I figured out they
+ might have a clue
+ < braunr> runtime_main != main
+ < braunr> (except if there is aliasing)
+ < nlightnfotis> there is still the possibility that runtime_main is the
+ main function and that runtime_exit(0) exits it.
+ < braunr> there is no doubt that main is main
+ < braunr> (almost)
+ < nlightnfotis> and I just found out that there is no main in assembly
+ produced from go. Only main.main
+ < braunr> check the elf headers for the entry point then
+ < nlightnfotis> braunr: I went through the headers, and found the process'
+ main. You can find it in <gcc_root>/libgo/runtime/go-main.c
+ < nlightnfotis> it seems very strange though: It creates a new thread, then
+ aborts?
+ < braunr> nlightnfotis: see :)
+ < braunr> nlightnfotis: add traces there
+ < nlightnfotis> braunr: can you look into that piece of code to check out
+ something I don't understand?
+ < nlightnfotis> braunr: I can not seem able to find __go_go 's definition
+ < nlightnfotis> only a declaration in runtime.h
+ < braunr>
+ https://github.com/NlightNFotis/gcc/blob/master/libgo/runtime/proc.c,
+ line 1552
+ < nlightnfotis> gee thanx. For a strange kind of fashion, I was looking for
+ it in runtime.c
+ < braunr> use git grep
+ < braunr> or tags/cscope
+ < nlightnfotis> braunr: yep! runtime_exit does seem to terminate a go
+ process that was not otherwise abnormally terminated.
+ < braunr> ?
+ < braunr> is it called or not ?
+ < braunr> runtime_exit is a macro on exit()
+ < braunr> so we already know what it does
+ < nlightnfotis> it is called
+ < braunr> ok
+ < braunr> that's not normal :)
+ < nlightnfotis> for a simple program
+ < braunr> uh ?
+ < nlightnfotis> for one that has a go routine
+ < braunr> but
+ < nlightnfotis> it doesn't
+ < nlightnfotis> it's expected
+ < braunr> ok
+ < braunr> that makes sense
+ < braunr> well, trace
+ < braunr> keep tracing
+ < braunr> for example in main()
+ < braunr> is runtime_mstart() actually reached ?
+ < nlightnfotis> yeah main and runtime_main were my next two targets
+ < braunr> good
+ < nlightnfotis> and now I followed your advice and it does compiler much
+ faster
+ < braunr> so, it looks like the main thread just becomes a mere kernel
+ thread
+ < braunr> running runtime_mstart() and fetching goroutines as needed
+ < braunr> after your traces, i'd suggest running a small go test program,
+ with one simple goroutine (doesn't crash right ?)
+ < braunr> and trace context switching
+ < braunr> but after the traces
+ < braunr> one important trace is to understand why runtime_exit gets called
+ < nlightnfotis> it does crash even with 1 goroutine
+ < braunr> oh
+ < braunr> when doesn't it crash ?
+ < nlightnfotis> when it has 0 goroutines
+ < nlightnfotis> it works as expected
+ < nlightnfotis> but anything involving goroutines crashes
+ < nlightnfotis> and goroutines are very important; everything in the
+ standard library involves goroutines
+ < braunr> ok
+ < braunr> doesn't change what i suggested, good
+ < braunr> 1/ find out why runtime_exit gets called
+ < braunr> 2/ trace context switching with 1 goroutine
+ < nlightnfotis> on it.
+ < braunr> in all cases, make all your goroutines (including the main one)
+ *not* return
+ < braunr> so that you don't deal with goroutine destruction yet
+ < nlightnfotis> runtime_mstart in main doesn't to be run at all. So the
+ path is __go_go and then return from it.
+ < nlightnfotis> *doesn't seem
+
+
+### IRC, freenode, #hurd, 2013-08-26
+
+ < braunr> youpi: my glibc clock patch looks incomplete btw
+ < youpi> which one?
+ < youpi> ah, the ticks one?
+ < braunr> yes
+ < braunr> it doesn't change the values returned by times
+ < braunr> as a side effect, the load average bumps to 2+ on an idle machine
+
+
+### IRC, freenode, #hurd, 2013-08-27
+
+ < nalaginrut> braunr: have you tried Guile2 on your machine? ;-)
+ < braunr> nalaginrut: no
+ < braunr> nalaginrut: but i saw the code actually does use sysconf()
+ < nalaginrut> braunr: yes, for ticks_per_second
+ < braunr> i had to look myself to find it out, you didn't say it, despite
+ me asking multiple times
+ < braunr> it won't make debugging easier ;p
+ < braunr> nalaginrut: also, the return value of times is actually *never*
+ used
+ < braunr> i don't know why you've been talking about it so much
+ < nalaginrut> braunr: I'm sorry, it's first time to look stime.c for me
+ < braunr> the interesting function is get_internal_run_time_times()
+ < nalaginrut> what do you mean about "the return value of times is actually
+ *never* used"? in which context?
+ < braunr> see get_internal_run_time_times
+ < braunr> struct tms time_buffer;
+ < braunr> times(&time_buffer);
+ < braunr> return ...
+ < braunr> and yes, the user and system time reported in struct tms are 0
+ < braunr> let's see what posix has to say about it
+ < pinotree> it says it will return (clock_t)-1 for errors, but no standard
+ errors are defined yet
+ < nalaginrut> but I don't think get_internal_run_time_times has something
+ to do with scm_times
+ < braunr> well, i don't see any other call to times()
+ < braunr> i've asked you repeatedly to look for how guile fetches the data
+ < braunr> i think it's done in get_internal_run_time_times
+ < braunr> what makes you think otherwise ?
+ < braunr> our times() seems to behave fine, other than the units of the
+ return value
+ < nalaginrut> I don't understand what do you mean?
+ get_internal_run_time_times is unrelated to scm_times which is actually
+ "times" in Scheme code
+ < braunr> ok
+ < nalaginrut> I think we're talking about "times" activity, right?
+ < braunr> ok so result is a vector
+ < braunr> with the return value and the four values in struct tms
+ < nalaginrut> yes
+ < braunr> and what looks interesting is
+ < braunr> factor = scm_quotient (scm_from_long (TIME_UNITS_PER_SECOND),
+ scm_from_long (ticks_per_second));
+ < braunr> SCM_SIMPLE_VECTOR_SET (result, 0, scm_product (scm_from_long
+ (rv), factor));
+ < braunr> TIME_UNITS_PER_SECOND is 1000
+ < nalaginrut> yes, it means (clock_t *
+ (TIME_UNITS_PER_SECOND/ticks_per_second)), though I've no idea why it
+ does this
+ < braunr> normalizing values i guess
+ < nalaginrut> I wonder if the factor should be 1, just guessing
+ < braunr> let's see what our clock tick really is
+ < braunr> 1000000 on an unmodified libc
+ < braunr> 100 with my patch
+ < nalaginrut> so what's the problem?
+ < nalaginrut> all the values were multiplied by ticks, it's fair for the
+ subtraction
+ < nalaginrut> I think the problem is clock is too large for the difference
+ between utime and utime(sleep 1)
+ < nalaginrut> oops, is too small
+ < nalaginrut> sorry, I confused,
+ < nalaginrut> the problem is the difference of clock is too large for
+ 2*internal-time-units-per-second
+ < nalaginrut> and actually, internal-time-units-per-second is
+ SCM_TIME_UNITS_PER_SECOND
+ < nalaginrut> but without your patch, 'times' would return zeros all the
+ time, which is never meet the condition: SCM_TIME_UNITS_PER_SECOND/2 <=
+ (clock2 - clock1)
+ < nalaginrut> well, maybe your point is
+ TIME_UNITS_PER_SECOND/ticks_per_second is too small without your patch,
+ which causes the scm_to_long cast give a 0 value
+ < nalaginrut> s/cast/casting
+ < nalaginrut> when ticks_per_second is 100, the factor would be 10, which
+ seems to be reasonable
+ < nalaginrut> s/scm_to_long/scm_from_long
+ < nalaginrut> well, I have to checkout this
+ < nalaginrut> OK, let me reconstruct the point: ticks_per_second so too
+ large that makes the factor becomes zero
+ < nalaginrut> but decrease ticks_per_second to 100 causes the clock become
+ too large than TIME_UNITS_PER_SECOND
+ < braunr> 10:59 < nalaginrut> but without your patch, 'times' would return
+ zeros all the time, which is never meet the condition:
+ SCM_TIME_UNITS_PER_SECOND/2 <= (clock2 - clock1)
+ < braunr> until you prove me otherwise, this is plain wrong
+ < braunr> times() never returned me 0
+ < braunr> so let's see, this gives us a factor of 1000 / 1000000
+ < braunr> so the problem is factor being 0
+ < braunr> that's why *guile* times returns 0
+ < braunr> with my patch it should return 10
+ < nalaginrut> braunr: I'm sorry I mean "stime" in Scheme returns zeros
+ < nalaginrut> yes, I think the problem is factor
+ < nalaginrut> the factor
+ < braunr> now why doesn't my patch fix it all ?
+ < braunr> ah yes, rv is still in microseconds
+ < braunr> that's what i've been telling youpi recently, my patch is
+ incomplete
+ < braunr> i'll cook a quick fix, give me a few minutes please
+ < nalaginrut> but it fixed something ;-)
+ < braunr> well, guile makes a stupid assumption here
+ < braunr> so it's not really a fix
+ < nalaginrut> braunr: should I ask some info about TIME_UNITS_PER_SECOND
+ from Guile community?
+ < nalaginrut> or it doesn't help
+ < braunr> what do you want to ask them ?
+ < nalaginrut> since I don't know how this value was chosen
+ < nalaginrut> dunno, I'll ask if you need it
+ < nalaginrut> I just think maybe you need this info
+ < braunr> well
+ < braunr> my plan is to align the hurd on what other archs do
+ < braunr> i.e. set clk_tck to 100
+ < braunr> in which case this won't be a problem any more
+ < braunr> now you could warn them about the protability issue
+ < braunr> i'm not sure if they would care though
+ < nalaginrut> the warning is useful for the future
+ < nalaginrut> and it's not hard to make a change I think, for a constant,
+ but it depends on the maintainers
+ < braunr> it's not that simple
+ < braunr> time related things can easily overflow in the future
+ < nalaginrut> alright
+ < braunr> refer to the 2038 end-of-the-world bug
+ < nalaginrut> so how can I describe the warning/suggestion to them?
+ < braunr> i'm not sure
+ < braunr> tell them the TIME_UNITS_PER_SECOND isn't appropriate for larger
+ values of clk_tck
+ < braunr> dammit, microseconds are hardcoded everywhere in
+ sysdeps/mach/hurd ... >(
+ < braunr> nalaginrut: my new patch seems to fix the problem
+ < braunr> nalaginrut: i've built debian packages with which you can
+ directly test
+ < braunr> nalaginrut: deb http://ftp.sceen.net/debian-hurd-i386
+ experimental/
+ < braunr> Totals for this test run:
+ < braunr> passes: 38605
+ < braunr> failures: 0
+ < braunr> unexpected passes: 0
+ < braunr> expected failures: 7
+ < braunr> unresolved test cases: 578
+ < braunr> untested test cases: 1
+ < braunr> unsupported test cases: 10
+ < braunr> errors: 0
+ < braunr> PASS: check-guile
+ < braunr> =============
+ < braunr> 1 test passed
+ < braunr> =============
+ < braunr> :)
+ < braunr> youpi: the branch i added to glibc contains a working patch for
+ clock_t in centiseconds
+ < youpi> k
+
+
+### IRC, freenode, #hurd, 2013-08-28
+
+ <nalaginrut> braunr: well, looks great! I'll try it soon~
+ <nalaginrut> braunr: BTW, where is the patch/
+ <mark_weaver> braunr: what was needed to get guile working on the hurd?
+ <mark_weaver> well, if the fix wasn't to guile, I don't need the details.
+ <braunr> 04:53 < nalaginrut> braunr: BTW, where is the patch/
+ <braunr> there is hardly anyone here at 5am
+ <braunr> nalaginrut:
+ http://git.savannah.gnu.org/cgit/hurd/glibc.git/log/?h=rbraun/clock_t_centiseconds
+ <nalaginrut> braunr: thanks for that, but why not use a constant for 100?
+ <braunr> nalaginrut: i don't know where to define it
+ <braunr> it's glibc, you don't define new stuff mindlessly
+ <youpi> braunr: about your centiseconds patch, did you run the libc
+ testsuite with it?
+ <mark_weaver> it does seem a shame to reduce the resolution of the timers
+ from microseconds to centiseconds. I wonder if that could be avoided.
+ <youpi> by fixing all applications which assume centiseconds
+ <mark_weaver> *nod* well, if there's such a problem in Guile, I'd be glad
+ to fix that.
+ <braunr> youpi: no
+ <mark_weaver> I see that there's a macro CLOCKS_PER_SEC that programs
+ should consult.
+ <youpi> braunr: ok, I'll do then
+ <braunr> mark_weaver: why is it a shame ?
+ <braunr> it's not clock or timer resolution
+ <youpi> it's clock_t resolution
+ <braunr> it's an obsolete api to measure average cpu usage
+ <braunr> having such a big value on the other hand reduces the cpu usage
+ durations
+ <mark_weaver> braunr: good point :) I confess to being mostly ignorant of
+ these APIs.
+ <mark_weaver> Though Guile should still consult CLOCKS_PER_SEC instead of
+ assuming centiseconds. If it's making an improper assumption, I'd like
+ to know so I can fix it.
+ <braunr> the improper assumption is that there are less than 1000 clock
+ ticks per second
+ <mark_weaver> do you know off-hand of some code in Guile that is making
+ improper assumptions?
+ <braunr> yes
+ <braunr> let me find it
+ <mark_weaver> thanks
+ <braunr> factor = scm_quotient (scm_from_long (TIME_UNITS_PER_SECOND),
+ <braunr> scm_from_long (ticks_per_second));
+ <braunr> it seems guile attempts to normalize all times values to
+ TIME_UNITS_PER_SECOND
+ <braunr> while i think it would be better off using ticks_per_second (clock
+ ticks as provided by sysconf())
+ <braunr> attempting to normalize here causes factor to become 0 if
+ TIME_UNITS_PER_SECOND < ticks_per_second
+ <mark_weaver> ah, I see.
+ <mark_weaver> I'll take care of it. thanks for the pointer!
+ <youpi> braunr: I've commited the centisecond patch to debian's glibc
+
+
+### IRC, freenode, #hurd, 2013-08-29
+
+ <nalaginrut> braunr: Guile2 works smoothly now, let me try something cool
+ with it
+ <braunr> nalaginrut: nice
diff --git a/open_issues/tmux.mdwn b/open_issues/tmux.mdwn
new file mode 100644
index 00000000..f71d13e1
--- /dev/null
+++ b/open_issues/tmux.mdwn
@@ -0,0 +1,24 @@
+[[!meta copyright="Copyright © 2013 Free Software Foundation, Inc."]]
+
+[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
+id="license" text="Permission is granted to copy, distribute and/or modify this
+document under the terms of the GNU Free Documentation License, Version 1.2 or
+any later version published by the Free Software Foundation; with no Invariant
+Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
+is included in the section entitled [[GNU Free Documentation
+License|/fdl]]."]]"""]]
+
+[[!tag open_issue_porting]]
+
+# IRC, freenode, #hurd, 2013-08-01
+
+ <braunr> teythoon: can you stop tmux on darnassus please ?
+ <braunr> i'd like to check something
+ <teythoon> done
+ <braunr> tmux makes load average grow to 5 without any visible activity :/
+ <braunr> can't reproduce it with my instances though
+ <braunr> anyway, that's minor
+ <teythoon> I used tmux before and never encountered that
+ <teythoon> sometimes tmux would hang on attaching or detaching though, but
+ overall I had less problems with tmux than with screen
+ <teythoon> ah, I tried to start tmux on darnassus and now it hangs
diff --git a/open_issues/translate_fd_or_port_to_file_name.mdwn b/open_issues/translate_fd_or_port_to_file_name.mdwn
index fda4b811..98fe0cfc 100644
--- a/open_issues/translate_fd_or_port_to_file_name.mdwn
+++ b/open_issues/translate_fd_or_port_to_file_name.mdwn
@@ -105,6 +105,57 @@ License|/fdl]]."]]"""]]
<tschwinge> Ah, for /proc/*/maps, right. I've been thinking more globally.
+## task_get_name, task_set_name RPCs
+
+[[!message-id "518AA5B0.6030409@verizon.net"]]
+
+
+## IRC, freenode, #hurd, 2013-05-10
+
+ <youpi> tschwinge's suggestion to put names on ports instead of tasks would
+ be useful too
+ <braunr> do you get task ports as easily as you get tasks in kdb ?
+ <youpi> there is task->itk_self & such
+ <youpi> or itk_space
+ <youpi> I don't remember which one is used by userspace
+ <braunr> i mean
+ <braunr> when you use the debugger, can you easily find its ports ?
+ <braunr> the task ports i mean
+ <braunr> or thread ports or whatever
+ <youpi> once you have a task, it's a matter of getting the itk_self port
+ <youpi> s/port/field member/
+ <braunr> so the debugger provides you with the addresses of the structs
+ <braunr> right ?
+ <youpi> yes, that's what we have already
+ <braunr> then ok
+ <braunr> bddebian: do that :p
+ <braunr> hehe
+ <youpi> see show all thread
+ <braunr> (haven't used kdb in a long time)
+ <bddebian> So, adding a name to ports like I did with tasks?
+ <braunr> remove what you did for tasks
+ <braunr> move it to ports
+ <braunr> it's very similar
+ <braunr> but hm
+ <braunr> i'm not sure where the RPC would be
+ <braunr> this RPC would exist for *all* ports
+ <braunr> or only for kernel objects if added to gnumach.defs
+ <youpi> it's just about moving the char array field to another structure
+ <youpi> and plugging that
+ <bddebian> But mach_task_self is a syscal, it looks like itk_self is just a
+ pointer to an ipc_port ?
+ <braunr> so ?
+ <braunr> you take that pointer and you get the port
+ <braunr> just like vm_map gets a struct vm_map from a task
+ <bddebian> So I am just adding ipc_port_name to the ipc_port struct in this
+ case?
+ <braunr> yes
+ <braunr> actually
+ <braunr> don't do anything just yet
+ <braunr> we need to sort a few details out first
+ <braunr> see bug-hurd
+
+
# IRC, freenode, #hurd, 2011-07-13
A related issue:
@@ -134,4 +185,8 @@ A related issue:
Also see email thread starting at [[!message-id
"20110714082216.GA8335@sceen.net"]].
-Justus: Once [these patches](http://lists.gnu.org/archive/html/bug-hurd/2013-07/msg00262.html) are merged there is a way to map from ports to file names, at least for lib{disk,net}fs, one would only have to make this information available somehow.
+Justus: Once [[!message-id desc="these patches"
+"1375178364-19917-4-git-send-email-4winter@informatik.uni-hamburg.de"]] are
+merged, there will be a way to map from ports to file names, at least for
+libdiskfs and libnetfs, one would only have to make this information available
+somehow.
diff --git a/open_issues/translator_stdout_stderr.mdwn b/open_issues/translator_stdout_stderr.mdwn
index 14ea1c6d..89efd4e1 100644
--- a/open_issues/translator_stdout_stderr.mdwn
+++ b/open_issues/translator_stdout_stderr.mdwn
@@ -1,5 +1,5 @@
-[[!meta copyright="Copyright © 2008, 2009, 2010, 2011 Free Software Foundation,
-Inc."]]
+[[!meta copyright="Copyright © 2008, 2009, 2010, 2011, 2013 Free Software
+Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -9,25 +9,69 @@ Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license
is included in the section entitled [[GNU Free Documentation
License|/fdl]]."]]"""]]
-[[!tag open_issue_hurd]]
+[[!toc]]
-There have been several discussions and proposals already, about adding a
-suitable logging mechanism to translators, for example.
+# "Weird Issue"
-Decide / implement / fix that (all?) running (passive?) translators' output
-should show up on the (Mach / Hurd) console / syslog.
+## IRC, freenode, #hurd, 2013-07-01
+[[!tag open_issue_hurd]]
-[[!taglink open_issue_documentation]]: [[!message-id
-"87oepj1wql.fsf@becket.becket.net"]]
+ <teythoon> oh, btw, there is this weird issue that I cannot figure out
+ <teythoon> I noticed that there is no newline printed by /hurd/init after
+ printing " proc" and " auth"
+ <teythoon> but there *is* a printf("\n"); fflush(stdout); in there
+ <teythoon> it's just not working
+ <pinotree> iirc a newline is supposed to be printed after all the essential
+ servers have been started
+ <pinotree> that one
+ <teythoon> yes
+ <teythoon> but this doesn't happen
+ <teythoon> for whatever reason printf("foo"); yields no output
+ <braunr> how are proc and auth printed ?
+ <teythoon> neither does printf("%s", "foo");
+ <teythoon> using printf
+ <teythoon> but printf("%i fooo", 4); works
+ <youpi> uh
+ <braunr> ??
+ <youpi> and does printf("loooooooooong line") worker?
+ <teythoon> no
+ <youpi> uh
+ <youpi> -er
+ <teythoon> and yes, the code is always fflushing stdout
+ <youpi> perhaps trying to put a sleep(1); to check for timing issues?
+ <teythoon> yes, I suspect something like that
+ <teythoon> b/c *sometimes* my hurd just freezes at this point
+ <braunr> ???
+ <teythoon> after printing proc auth and not printing the newline
+ <braunr> such horror stories .
+ <braunr> ..
+ <teythoon> and I *think* that putting more printfs there for testing
+ purposes makes this worse, but I'm not sure about this
+ <braunr> in case you need to debug using printf, there is the mach_print
+ system call
+ <braunr> (in -dbg kernels)
+[[microkernel/mach/gnumach/interface/syscall/mach_print]].
-[[!taglink open_issue_documentation]]: Neal once had written an email on this
-topic.
+ <teythoon> what's wrong with using printf?
+ <braunr> you need to write the little assembly call yourself, where you
+ intend to use it, because it's not exported by glibc
+ <braunr> printf is an RPC
+ <braunr> teythoon: RPCs are complicated stuff
+ <braunr> in particular in core glibc parts like signal handling
+ <youpi> and printf goes through the console translator
+ <braunr> also, if you don't yet have a console server available, it comes
+ in handy
+ <youpi> better direct output directly to the kernel
-IRC, freenode, #hurd, 2011-11-06
+# `stderr` buffered
+
+## IRC, freenode, #hurd, 2011-11-06
+
+[[!tag open_issue_hurd]]
<youpi> about CLI_DEBUG, you can use #define CLI_DEBUG(fmt, ...) {
fprintf(stderr, fmt, ## __VA_ARGS__); fflush(stderr); }
@@ -40,7 +84,24 @@ IRC, freenode, #hurd, 2011-11-06
<tschwinge> That sounds wrong.
-IRC, freenode, #hurd, 2011-11-23
+# Logging
+
+[[!tag open_issue_hurd]]
+
+There have been several discussions and proposals already, about adding a
+suitable logging mechanism to translators, for example.
+
+Decide / implement / fix that (all?) running (passive?) translators' output
+should show up on the (Mach / Hurd) console / syslog.
+
+[[!taglink open_issue_documentation]]: [[!message-id
+"87oepj1wql.fsf@becket.becket.net"]]
+
+[[!taglink open_issue_documentation]]: Neal once had written an email on this
+topic.
+
+
+## IRC, freenode, #hurd, 2011-11-23
<braunr> we'd need a special logging task for hurd servers
<pinotree> if syslog would work, that could be used optionally
diff --git a/open_issues/user-space_device_drivers.mdwn b/open_issues/user-space_device_drivers.mdwn
index 8cde8281..be77f8e1 100644
--- a/open_issues/user-space_device_drivers.mdwn
+++ b/open_issues/user-space_device_drivers.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2009, 2011, 2012 Free Software Foundation,
+[[!meta copyright="Copyright © 2009, 2011, 2012, 2013 Free Software Foundation,
Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
@@ -154,12 +154,100 @@ A similar problem is described in
< braunr> s/disk/storage/
-### IRC, freenode, #hurd, 2012-04-25
+#### IRC, freenode, #hurd, 2012-04-25
<youpi> btw, remember the initrd thing?
<youpi> I just came across task.c in libstore/ :)
+#### IRC, freenode, #hurd, 2013-06-24
+
+ <youpi> we added a new initrd command to gnumach, to expose a new mach
+ device, which ext2fs can open and unzip
+ <youpi> we consider replacing that with simply putting the data in a dead
+ process
+ <youpi> s/process/task
+ <youpi> and let ext2fs read data from the task, and kill it when done
+ <teythoon> ok
+ <youpi> alternatively, tmps would work with an initial .tar.gz payload
+ <youpi> that would be best for memory usage
+ <youpi> tmpfs*
+ <teythoon> can't we replace the initrd concept with sub/neighbourhood?
+ <youpi> setting up tmpfs with an initial payload could be done with a
+ bootstrap subhurd
+ <teythoon> yes
+ <youpi> but it seems to me that having tmpfs being able to have an initial
+ payload is interesting
+ <teythoon> is there any advantage of the tmpfs translator prefilled with a
+ tarball over ext2fs with copy & bunzip?
+ <youpi> memory usage
+ <youpi> ext2fs with copy&bunzip takes memory for zeroes
+ <youpi> and we have to forecast how much data might be stored
+ <youpi> (if writable)
+ <teythoon> ah sure
+ <teythoon> but why would it have to be in the tmpfs translator? I why not
+ start the translator and have tar extract stuff there?
+ <teythoon> with the livecd I had trouble replacing the root translator, but
+ when using subhurds that shouldn't be a prwoblem at all
+ <youpi> I don't have a real opinion on this
+ <youpi> except that people don't usually like initrd :)
+ <braunr> 12:43 < teythoon> but why would it have to be in the tmpfs
+ translator? I why not start the translator and have tar extract stuff
+ there?
+ <braunr> that sounds an awful lot like an initramfs
+ <teythoon> yes, exactly, without actually having an initramfs of course
+ <braunr> yep
+ <braunr> i actually prefer that way too
+ <teythoon> a system on a r/o isofs cannot do much, but it can do this
+ <braunr> on the other hand, i wouldn't spend much time on a virtio disk
+ driver for now
+ <braunr> the hurd as it is can't boot on a device that isn't managed by the
+ kernel
+ <braunr> we'd need to change the boot protocol
+
+
+#### IRC, freenode, #hurd, 2013-06-28
+
+ <teythoon> I'm tempted to redo a livecd, simpler and without the initrd
+ hack that youpi used for d-i
+ <braunr> initrd hack ?
+ <braunr> you mean more a la initramfs then ?
+ <teythoon> no, I thought about using a r/o isofs translator, but instead of
+ fixing that one up with a r/w overlay and lot's of firmlinks like I used
+ to, it would just start an ext2fs translator with copy on an image stored
+ on the iso and start a subhurd
+ <braunr> why a subhurd ?
+ <teythoon> neighbourhurd even
+ <teythoon> b/c back in the days I had trouble replacing /
+ <braunr> yes, that's hard
+ <teythoon> subhurd would take of that for free
+ <braunr> are you sure ?
+ <teythoon> somewhat
+ <braunr> i'm not, but this requires thorough thinking
+ <braunr> and i'm not there yet
+ <teythoon> y would it not?
+ <teythoon> just start a subhurd and let that one take over the console and
+ let the user and d-i play nicely in that environment
+ <teythoon> no hacks involved
+ <braunr> because it would require sharing things between the two system
+ instances, and that's not easy
+ <teythoon> no but the bootstrap system does nothing after launching the
+ subhurd
+ <teythoon> I mean yes, technically true, but why would it be hard to share
+ with someone who does nothing?
+ <braunr> the context isn't well defined enough to clearly state anything
+ <braunr> if you don't use the resources of the first hurd, that's ok
+ <braunr> otherwise, it may be easy or not, i don't know yet
+ <teythoon> you think it's worth a shot and see what issues crop up?
+ <braunr> sure
+ <braunr> definitely
+ <teythoon> it doesn't sound complicated at all
+ <braunr> it's easy enough to the point we see something goes wrong or works
+ completely
+ <braunr> so worth testin
+ <teythoon> cool :)
+
+
### IRC, freenode, #hurd, 2012-07-17
<bddebian> OK, here is a stupid question I have always had. If you move
@@ -634,3 +722,8 @@ A similar problem is described in
* <http://ertos.nicta.com.au/research/drivers/uldd/>
* <http://gelato.unsw.edu.au/IA64wiki/UserLevelDrivers>
+
+ * [Running applications on the Xen
+ Hypervisor](http://blog.netbsd.org/tnf/entry/running_applications_on_the_xen),
+ Antti Kantee, 2013-09-17. [The Anykernel and Rump
+ Kernels](http://www.netbsd.org/docs/rump/).
diff --git a/open_issues/virtualization/fakeroot.mdwn b/open_issues/virtualization/fakeroot.mdwn
index f4739776..f9dd4756 100644
--- a/open_issues/virtualization/fakeroot.mdwn
+++ b/open_issues/virtualization/fakeroot.mdwn
@@ -22,3 +22,46 @@ License|/fdl]]."]]"""]]
<youpi> btw, I believe our fakeroot-hurd is close to working actually
<youpi> it's just a argv[0] issue supposed to be fixed by exec_file_name
but apparently not fixed in that case, for some reason
+
+
+## IRC, freenode, #hurd, 2013-08-26
+
+ < teythoon> also I looked into the fakeroot issue, aiui the problem is that
+ scripts are not handled correctly, right?
+ < teythoon> the exec server fails to locate the scripts file name, and so
+ it hands the file_t to the interpreter process and passes /dev/fds/3 as
+ script name
+ < teythoon> afaics that breaks e.g. python
+ < youpi> yes
+ < youpi> pinotree's exec_file_name is supposed to fix that, but for some
+ reason it doesn't work here
+ < pinotree> it was pochu's, not mine
+ < youpi> ah, right
+ < teythoon> ah I see, I was wondering about that
+ < pochu> it was working for a long time, wasn't it?
+ < pochu> and only stopped working recently
+ < youpi> did it completely stop?
+ < youpi> I have indeed seen odd issues
+ < youpi> I haven't actually checked whether it has completely stopped
+ working
+ < youpi> probably worth looking there first
+ < pinotree> gtk+3.0 fails, but other stuff like glib2.0 and gtester-using
+ stuff works
+ < teythoon> huh? I created tests like "#!/bin/sh\necho $0" and that says
+ /dev/fd..., and a python script doing the same doesn't even run, so how
+ can it work for a package build?
+ < youpi> it works for me in plain bash
+ < youpi> #!/bin/sh
+ < youpi> echo $0
+ < youpi> € $PWD/test.sh
+ < youpi> /home/samy/test.sh
+ < teythoon> it does !?
+ < youpi> yes
+ < youpi> not in fakeroot-hurd however, as we said
+ < teythoon> well, obviously it works when not being run under
+ fakeroot-hurd, yes
+ < youpi> ok, so we weren't talking about the same thing
+ < youpi> a mere shell script doesn't work in fakeroot-hurd indeed
+ < youpi> that's why we still use fakeroot-sysv
+ < teythoon> right
+ < youpi> err, -tcp
diff --git a/open_issues/virtualization/networking.mdwn b/open_issues/virtualization/networking.mdwn
index 7a6474a1..f8bda063 100644
--- a/open_issues/virtualization/networking.mdwn
+++ b/open_issues/virtualization/networking.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2010 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2010, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -28,3 +28,73 @@ Collection about stuff that is relevant for *virtualization* and *networking*.
[[hurd/translator/pfinet]] by setting environment variables.
Project is now part of [[Virtual_Square_View-OS]].
+
+
+# OpenVPN
+
+[[community/meetings/GHM2013]].
+
+
+## IRC, freenode, #hurd, 2013-08-23
+
+ <youpi> good news
+ <youpi> with a couple small patches, openvpn does work as joe user
+
+
+## IRC, freenode, #hurd, 2013-08-30
+
+ <youpi> it's really cool that openvpn ended up working completely the day
+ before :)
+
+
+## IRC, freenode, #hurd, 2013-09-03
+
+ <_d3f> Hey guys, how did you get openvpn working on the Hurd? just curious
+ as I saw it in the GHM video
+ <_d3f> no one here who has a clue how to get *vpn working on the Hurd?
+ <braunr> _d3f: youpi did it
+ <braunr> i don't know the details
+ <_d3f> okay, I will question him when I see him around, thx. Do you know if
+ it was a lot of work to get the tun device working? Because I would like
+ to use tinc on the Hurd.
+ <braunr> _d3f: a bit but not that much either
+ <_d3f> braunr: well, okay. Do you know if the source of his 'port' is
+ online, I haven't found it :/
+ <braunr> it should be soon
+
+
+## IRC, freenode, #hurd, 2013-09-04
+
+ <_d3f> youpi: you are the guy who has brought openvpn to the hurd, right? I
+ would like to know how you got the tun/tap thing working as I would like
+ to use tinc on it. :)
+ <youpi> _d3f: essentially no modification of openvpn iirc
+ <youpi> just tell it to open the tun node created by pfinet
+ <youpi> and read/write it
+ <youpi> i.e. the existing generic code in place in openvpn
+ <_d3f> I will have a look at it, somekind tinc builds with the linux
+ specific device.c but I wasn't able to exchange keys. I will have a look
+ at the device handling again and try to get the pfinet tun node used.
+
+
+## IRC, freenode, #hurd, 2013-09-07
+
+ <d3f> anyone here knows how /dev/net is handled on the hurd? Programs using
+ it say it's not a directory. I tried creating one and setting a netdde
+ translator for a tun device in it, but this may be wrong as it doesn't
+ work
+ <teythoon> d3f: what does /dev/net do?
+ <teythoon> ah, its tun/tap stuff...
+ <d3f> on my gnu/linux it includes a tun device
+ <teythoon> right
+ <d3f> I am still reading about the Hurd and try to understand /hurd/netdde
+ and devnode but by now I am quite sure I will need those to set a tun
+ networktranslator on /dev/net/tun?
+ <teythoon> hm, I don't think netdde or devnode will be of any help
+ <teythoon> afaiui devnode makes mach devices available in the hurdish way,
+ i.e. available for lookup in the filesystem
+ <teythoon> d3f: ping youpi if he shows up, he hacked up openvpn to work on
+ the hurd
+ <d3f> yeah I know, I talked to him as I am tring to get tinc working on the
+ Hurd (tinc builds by now). I will give him a shot about creating the
+ "tun" device
diff --git a/public_hurd_boxen.mdwn b/public_hurd_boxen.mdwn
index 80c5198a..b5ffcdc1 100644
--- a/public_hurd_boxen.mdwn
+++ b/public_hurd_boxen.mdwn
@@ -29,7 +29,7 @@ image|hurd/running/qemu]].
"[[bddebian]]","goober","Debian GNU/Hurd","?"
"[[bddebian]]","grubber","Debian GNU/Hurd","Celeron 2.2 GHz; 554 MiB","Xen domU on [[zenhost]]; for experimental stuff"
"[[bddebian]]","[[zenhost]]","Debian GNU/Linux","Celeron 2.2 GHz","Xen dom0 for several hosts"
-"[[sceen]]","darnassus","Debian GNU/Hurd","Xeon E5-1620 3.6 GHz, 1.8 GiB","KVM guest on dalaran; public Hurd box; web server"
+"[[sceen]]","darnassus","Debian GNU/Hurd","Xeon E5-1620 3.6 GHz, 1.8 GiB","KVM guest on dalaran; public Hurd box; [web server](http://darnassus.sceen.net/)"
"[[sceen]]","ironforge","Debian GNU/Hurd","Xeon E5-1620 3.6 GHz, 1.8 GiB","KVM guest on dalaran; Debian buildd"
"[[sceen]]","exodar","Debian GNU/Hurd","Xeon E5-1620 3.6 GHz, 1.8 GiB","KVM guest on dalaran; Debian porterbox, all Debian Developers have access"
"[[sceen]]","dalaran","Debian GNU/Linux","Xeon E5-1620 3.6 GHz, 64 GiB ECC","KVM host"
diff --git a/public_hurd_boxen/sceen.mdwn b/public_hurd_boxen/sceen.mdwn
index 25416857..b9188ffe 100644
--- a/public_hurd_boxen/sceen.mdwn
+++ b/public_hurd_boxen/sceen.mdwn
@@ -1,4 +1,4 @@
-[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]]
+[[!meta copyright="Copyright © 2011, 2013 Free Software Foundation, Inc."]]
[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
@@ -9,3 +9,11 @@ is included in the section entitled [[GNU Free Documentation
License|/fdl]]."]]"""]]
<http://www.sceen.net/>
+
+
+# IRC, freenode, #hurd, 2013-08-21
+
+ <braunr> i made all sceen.net VMs use hugetlbfs for their physical memory
+ <braunr> i suspect a system like the hurd, with such a huge working set for
+ just about every action compared to other systems, should visibly benefit
+ from that
diff --git a/system_call.mdwn b/system_call.mdwn
index f180a79b..16d706c7 100644
--- a/system_call.mdwn
+++ b/system_call.mdwn
@@ -18,3 +18,18 @@ See [[GNU Mach's system calls|microkernel/mach/gnumach/interface/syscall]].
In the [[GNU Hurd|hurd]], a lot of what is traditionlly considered to be a UNIX
system call is implemented (primarily by means of [[RPC]]) inside [[glibc]].
+
+
+# IRC, freenode, #hurd, 2013-06-15
+
+ <braunr> true system calls are always implemented the same way, by the
+ kernel, using traps or specialized instructions that enable crossing from
+ user to kernel space
+ <braunr> glibc simply translates function calls to system calls by packing
+ arguments appropriately and using that trap or syscall instruction
+ <braunr> on microkernel based systems however, true system calls are
+ normally used only for IPC
+ <braunr> so we also use the term syscall to refer to those RPCs that
+ provide system services
+ <braunr> e.G. open() is a call to a file system server (and maybe several
+ actually)
diff --git a/toolchain/logs b/toolchain/logs
-Subproject 05905efce314e17e8f07c331e7f5520bde6b8a0
+Subproject c0775a78c9a55fd63c4d92e2baca1f4de6cbb5d