From 957cba89001d8d06e681ca493500209c5a151464 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Sat, 23 Jul 2011 08:50:54 +0200 Subject: IRC. --- .../io_system/clustered_page_faults.mdwn | 40 ++++++++++++++++++++-- 1 file changed, 37 insertions(+), 3 deletions(-) (limited to 'open_issues/performance') diff --git a/open_issues/performance/io_system/clustered_page_faults.mdwn b/open_issues/performance/io_system/clustered_page_faults.mdwn index 37433e06..9e20f8e1 100644 --- a/open_issues/performance/io_system/clustered_page_faults.mdwn +++ b/open_issues/performance/io_system/clustered_page_faults.mdwn @@ -12,7 +12,10 @@ License|/fdl]]."]]"""]] [[community/gsoc/project_ideas/disk_io_performance]]. -IRC, freenode, #hurd, 2011-02-16 +[[!toc]] + + +# IRC, freenode, #hurd, 2011-02-16 exceptfor the kernel, everything in an address space is represented with a VM object @@ -88,9 +91,8 @@ IRC, freenode, #hurd, 2011-02-16 recommend* ok ---- -IRC, freenode, #hurd, 2011-02-16 +# IRC, freenode, #hurd, 2011-02-16 etenil: OSF Mach does have clustered paging BTW; so that's one place to start looking... @@ -103,3 +105,35 @@ IRC, freenode, #hurd, 2011-02-16 can serve as a starting point + + +# IRC, freenode, #hurd, 2011-07-22 + + but concerning clustered pagins/outs, i'm not sure it's a mach + interface limitation + the external memory pager interface does allow multiple pages to + be transfered + isn't it an internal Mach VM problem ? + isn't it simply the page fault handler ? + braunr: are you sure? I was under the impression that changing the + pager interface was among the requirements... + hm... I wonder whether for pageins, it could actually be handled + in the pages instead of Mach... though this wouldn't work for pageouts, + so probably not very helpful + err... in the pagers + antrik: i'm almost sure + but i've be proven wrong many times, so .. + there are two main facts that lead me to think this + 1/ + http://www.gnu.org/software/hurd/gnumach-doc/Memory-Objects-and-Data.html#Memory-Objects-and-Data + says lengths are provided and doesn't mention the limitation + 2/ when reading about UVM, one of the major improvements (between + 10 and 30% of global performance depending on the benchmarks) was + implementing the madvise semantics + and this didn't involve a new pager interface, but rather a new + page fault handler + braunr: hm... the interface indeed looks like it can handle + multiple pages in both directions... perhaps it was at the Hurd level + where the pager interface needs to be modified, not the Mach one?... + antrik: would be nice wouldn't it ? :) + antrik: more probably the page fault handler -- cgit v1.2.3 From 3e7472b3d54853389cd8a17475901fbef976ef18 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Thu, 1 Sep 2011 09:27:33 +0200 Subject: IRC. --- hurd/subhurd/discussion.mdwn | 69 ++++ hurd/translator/discussion.mdwn | 25 ++ hurd/translator/procfs/jkoenig/discussion.mdwn | 23 ++ microkernel/discussion.mdwn | 24 ++ microkernel/mach/memory_object/discussion.mdwn | 24 ++ news/2011-q2-ps.mdwn | 33 ++ .../active_vs_passive_symlink_translator.mdwn | 44 +++ open_issues/clock_gettime.mdwn | 28 +- open_issues/code_analysis.mdwn | 7 + open_issues/glibc_init_first.mdwn | 78 ++++ open_issues/gnumach_memory_management.mdwn | 397 +++++++++++++++++++++ open_issues/hurd_101.mdwn | 38 ++ open_issues/libpthread_dlopen.mdwn | 30 +- open_issues/mach_tasks_memory_usage.mdwn | 49 ++- open_issues/mmap_crash_etc.mdwn | 95 +++++ open_issues/multiprocessing.mdwn | 37 +- open_issues/packaging_libpthread.mdwn | 5 +- open_issues/performance.mdwn | 4 + open_issues/performance/degradation.mdwn | 28 ++ .../performance/io_system/binutils_ld_64ksec.mdwn | 15 + .../performance/microkernel_multi-server.mdwn | 47 +++ open_issues/proc_server_proc_exception_raise.mdwn | 37 ++ open_issues/resource_management_problems.mdwn | 15 + .../io_accounting.mdwn | 49 +++ open_issues/sa_siginfo_sa_sigaction.mdwn | 49 ++- open_issues/sbcl.mdwn | 31 ++ open_issues/sendmsg_scm_creds.mdwn | 4 + open_issues/syslog.mdwn | 44 ++- open_issues/tty_activitiy_vs_disk_io.mdwn | 81 +++++ open_issues/user-space_device_drivers.mdwn | 36 ++ open_issues/wine.mdwn | 50 ++- open_issues/wine/rg6dx09G.patch | 116 ++++++ 32 files changed, 1598 insertions(+), 14 deletions(-) create mode 100644 hurd/subhurd/discussion.mdwn create mode 100644 hurd/translator/discussion.mdwn create mode 100644 microkernel/discussion.mdwn create mode 100644 microkernel/mach/memory_object/discussion.mdwn create mode 100644 open_issues/active_vs_passive_symlink_translator.mdwn create mode 100644 open_issues/glibc_init_first.mdwn create mode 100644 open_issues/hurd_101.mdwn create mode 100644 open_issues/mmap_crash_etc.mdwn create mode 100644 open_issues/performance/degradation.mdwn create mode 100644 open_issues/performance/microkernel_multi-server.mdwn create mode 100644 open_issues/proc_server_proc_exception_raise.mdwn create mode 100644 open_issues/resource_management_problems/io_accounting.mdwn create mode 100644 open_issues/sbcl.mdwn create mode 100644 open_issues/tty_activitiy_vs_disk_io.mdwn create mode 100644 open_issues/wine/rg6dx09G.patch (limited to 'open_issues/performance') diff --git a/hurd/subhurd/discussion.mdwn b/hurd/subhurd/discussion.mdwn new file mode 100644 index 00000000..3449edcd --- /dev/null +++ b/hurd/subhurd/discussion.mdwn @@ -0,0 +1,69 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_documentation]] + +IRC, freenode, #hurd, 2011-08-10 + + < braunr> youpi: aren't sub-hurds actually called "neighbor hurds" ? + < youpi> no idea + < braunr> i also don't understand the recursive property + < youpi> a user can run a subhurd + < neal> braunr: What don't you understand? + < youpi> a user in a subhurd can run a subhurd + < youpi> etc + < braunr> i'm not sure it's really recursive + < neal> youpi: At some point it was observed that you don't strictly + require any resources from the "parent" Hurd. + < neal> youpi: i.e., you could have two Hurds running "directly" on Mach + < youpi> sure + < neal> youpi: Hence neighbor rather than sub + < youpi> but you need to be root for that + < youpi> or else your subhurd can't do much + < neal> you need to have been authorized to use the required resouces + < youpi> which is about the same :) + < neal> depends how they are delegated + < youpi> that's still asking root for something + < neal> if you say so + < youpi> which is most probably not the default + < braunr> well, either you depend on the parent to do things on your + behalf, or you directly have some privileged ports + < braunr> i'd agree with youpi that it's pretty much having root access at + some point + < youpi> and usually you don't have privileged ports by default :) + < braunr> but we don't need to restrict the presentation to user only sub + hurds + < braunr> people don't mind switching to root on their desktops + < braunr> which is one of the reasons they ask "what does the hurd really + bring me today ?" + < braunr> but being able to run truely separate hurds or recursive hurds is + something nice most OSes can't do easily + < youpi> switching to root becomes a *pain* when you have to do it 1 every + two commands + < braunr> yes sure, but some people might just say you're clumsy :x + < neal> The question is: can I start a sub-hurd from within another hurd + that survives the parent's hurd exiting? The answer is yes. The reason + is that the sub-hurd can be constructed in such a way that it does not + rely on the parent. In this case, the parent does not necessarily + subjugate the sub-hurd. Hence the name. + < braunr> but that's out of the scope of the discussion + < antrik> using the traditional, root only mechanism, neighbour-hurd is + indeed a more appropriate term. apart from the initial terminal being + proxied to the parent system by the boot program, they are really equal + < antrik> with zhengda's work on non-root subhurds, you rely on various + proxies in the parent system to access privileged resources; so subhurd + is indeed a more appropriate term in this case + < antrik> (not only non-root subhurds in fact... when using any of the + proxies, such as the network multiplexer -- even if still running as + root...) + < youpi> antrik: you could still give a com0 port as terminal + < antrik> I don't think that's actually supported in the boot + program... but it doesn't really matter, as you don't really need the + terminal anyways -- you can always log in through the network diff --git a/hurd/translator/discussion.mdwn b/hurd/translator/discussion.mdwn new file mode 100644 index 00000000..e038ba84 --- /dev/null +++ b/hurd/translator/discussion.mdwn @@ -0,0 +1,25 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_documentation open_issue_hurd]] + +IRC, freenode, #hurd, 2011-08-25: + + < frhodes> how can I replace an existing running server with a new one + without rebooting? + < antrik> frhodes: depends. if other critical things depend on it, you + can't. there is no mechanism to serialize and pass on the open sessions + < antrik> in some situations, you can orphan the old translator while + starting a new one, so the previous clients will stay with the old one + while new one will get the new one + < antrik> obviously that only works for things that aren't exclusive by + nature + < antrik> in some cases, you might even be able simply to remove the old + translator... but obviously only for non-critical stuff :-) diff --git a/hurd/translator/procfs/jkoenig/discussion.mdwn b/hurd/translator/procfs/jkoenig/discussion.mdwn index 64e3776e..01bbea42 100644 --- a/hurd/translator/procfs/jkoenig/discussion.mdwn +++ b/hurd/translator/procfs/jkoenig/discussion.mdwn @@ -184,3 +184,26 @@ IRC, freenode, #hurd, 2011-07-22 status is 644 though but status contains information which anyone can ask to the proc server anyway, I think. + + +# `/proc/mounts`, `/proc/$pid/mounts` + +IRC, freenode, #hurd, 2011-07-25 + + < pinotree> jkoenig: btw, what do you think about providing empty + /proc/mounts and /proc/$pid/mounts files? + < jkoenig> pinotree, I guess one would have to evaluate the consequences + wrt. existing use cases (in other words, "I have absolutely no clue + whatsoever about whether that would be desirable" :-) + < jkoenig> pinotree, the thing is, an error message like "/proc/mounts: No + such file or directory" is rather explicit, whereas errors which would be + caused by missing data in /proc/mounts would maybe be harder to track + < braunr> this seems reasonable though + < braunr> there already are many servers with e.g. grsecurity or chrooted + environments where mounts is empty + < pinotree> well, currently we also have an empty mtab + < braunr> pinotree: but what do you need that for ? + < braunr> pinotree: the init system ? + < pinotree> and the mnt C api already returns no entries (or it bails out, + i don't remember) + < pinotree> not a strict need diff --git a/microkernel/discussion.mdwn b/microkernel/discussion.mdwn new file mode 100644 index 00000000..a5a73e18 --- /dev/null +++ b/microkernel/discussion.mdwn @@ -0,0 +1,24 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_documentation]] + +IRC, freenode, #hurd, 2011-07-26: + + < antrik> Tekk_`: regarding microkernels: the basic idea, and really the + *only* fundamental difference, is that they isolate things in separate + address spaces. everything else goes back to this. + < antrik> benefits from the isolation generally fall into two groups: more + robustness (main focus of Minix3), and more flexibility (main focus of + Hurd) + < antrik> while it might also encourage some other good design choices, + these are secondary effects: such choices can also be implemented in a + monolithic architecture -- and not necessarily harder. just less obvious + in some cases... diff --git a/microkernel/mach/memory_object/discussion.mdwn b/microkernel/mach/memory_object/discussion.mdwn new file mode 100644 index 00000000..a006429b --- /dev/null +++ b/microkernel/mach/memory_object/discussion.mdwn @@ -0,0 +1,24 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_documentation open_issue_gnumach]] + +IRC, freenode, #hurd, 2011-08-05 + + < neal> braunr: For instance, memory objects are great as they allow you to + specify the mapping policy in user space. + < neal> braunr: But, the policy for determining the eviction order is + realized by Mach + < neal> braunr: And user-space has no control + < braunr> are you referring to the page cache lru approximation and stuff + like resource containers ? + < neal> I'm not sure what you mean by page cache lru appoximateion + < braunr> the kernel eviction policy :) + < neal> that's an implementation detail diff --git a/news/2011-q2-ps.mdwn b/news/2011-q2-ps.mdwn index cbf039b0..14578e83 100644 --- a/news/2011-q2-ps.mdwn +++ b/news/2011-q2-ps.mdwn @@ -95,4 +95,37 @@ slashdot and phoronix did some [performance tests of the Hurd][phorperf], [phorperf]: http://www.phoronix.com/scan.php?page=article&item=debian_gnu_hurd&num=1 +--- + +IRC, freenode, #hurd, 2011-08-24: + + < ArneBab> hurd related: I now think you were right, antrik: the hurd + rumors don’t belong into the news (tschwinge) + < antrik> ArneBab: you mean the postscriptum as a whole, or just the wild + rumours part?... + < ArneBab> the whole PS + < ArneBab> it should rather go into a blog post + < ArneBab> (in the wiki) + < antrik> hm... I don't think I agree + < ArneBab> why? + < antrik> apparently there is a number of people following the news now, + and apparently many of them misread some statements... it makes sense to + use the same channel for clarifying them I'd say + < ArneBab> hm, ok + < ArneBab> how would you select the part to include? + < antrik> roughly speaking, I'd include everything that actually relates to + the previous news that were misunderstood + < antrik> and drop all unrelated speculations that popped up + < antrik> BTW, it *might* be useful perhaps to actually update the original + news posting with the clarifications?... + < ArneBab> we can’t do that without breaking some peoples RSS feeds + < antrik> note that there is another aspect to consider: the fact that + several news sites picked it up is indeed genuine news by itself... + < ArneBab> that’s right, yes + < antrik> will it really break anything? from what I heard so far it just + means they will see the posting as new again, which would actually make + sense in this case... + < antrik> but I don't insist if you think it's too risky :-) + < antrik> just an idea + --> diff --git a/open_issues/active_vs_passive_symlink_translator.mdwn b/open_issues/active_vs_passive_symlink_translator.mdwn new file mode 100644 index 00000000..cbd9b077 --- /dev/null +++ b/open_issues/active_vs_passive_symlink_translator.mdwn @@ -0,0 +1,44 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_documentation open_issue_hurd]] + +IRC, freenode, #hurd, 2011-07-25 + +Set an *active* (not *passive*) `/hurd/symlink` translator on a node. + + < antrik> that's strange: the file doesn't look like a symlink in ls output + -- but it behaves like one... + < antrik> using firmlink instead of symlink yields less confusing + results... + < gg0> how does it behaves like one? + < antrik> perhaps the symlink mechanism only fully works for a passive + symlink translator, not an active one + < antrik> gg0: if you access it, you actually get the linked file contents + < antrik> it's only ls that's confused + < antrik> it might be because ls -l uses O_NOFOLLOW, which results in + O_NOTRANS, so it sees the original file contents + < gg0> stat says it's still 12264 bytes + < antrik> stat also seems to use NOFOLLOW + < antrik> wc will show the "correct" size + < gg0> ok + < antrik> if you set it as passive translator, it works as expected... but + then you better don't forget removing it, as it won't go away after a + reboot :-) + < antrik> but as I said, you can just ignore the weirdness -- or use + firmlink instead + < antrik> the thing is, if symlink is set as a passive translator, the + filesystem handles it specially, so it really looks like a symlink to + programs using NOFOLLOW. that's not the case with an active symlink... so + programs using NOFOLLOW simply do not see the active symlink at all + < antrik> firmlink OTOH ignores NOFOLLOW, so you always see the linked-to + file + + * [[hurd/translator/short-circuiting]] diff --git a/open_issues/clock_gettime.mdwn b/open_issues/clock_gettime.mdwn index bba0d171..c06edc9b 100644 --- a/open_issues/clock_gettime.mdwn +++ b/open_issues/clock_gettime.mdwn @@ -12,8 +12,30 @@ License|/fdl]]."]]"""]] [[!tag open_issue_glibc open_issue_gnumach]] -Missing clock_gettime(CLOCK_MONOTONIC) (e.g. for iceweasel) +Missing `clock_gettime(CLOCK_MONOTONIC)` (e.g. for iceweasel) -It could be a mere matter of extending the mappable clock: add it to mapped_time_value_t in gnumach, handle it in gnumach/kern/mach_clock.c, and make clock_gettime use it. +It could be a mere matter of extending the mappable clock: add it to +`mapped_time_value_t` in gnumach, handle it in `gnumach/kern/mach_clock.c`, and +make `clock_gettime` use it. -BTW, also make gettimeofday() use it, since it's way more efficient and some applications assume that it is. +BTW, also make `gettimeofday()` use it, since it's way more efficient and some +applications assume that it is. + +What about adding a nanosecond-precision clock, too? --[[tschwinge]] + +IRC, freenode, #hurd, 2011-08-26: + + < pinotree> youpi: thing is: apparently i found a simple way to have a + monotonic clock as mmap-able device inside gnumach + < pinotree> currently, in kern/mach_clock.c there's a variable 'time', + which gets increased on clock interrupt, and optionally modified by + host_set_time + < pinotree> () + < pinotree> if i add a new variable next to it, only increasing it on + interrupt but not modifying it at all otherwise, would that give me a + monotonic clock? + < pinotree> at least on sme basic tests i did, it seems it could work that + way + < youpi> yes, it should work + < braunr> sure + < youpi> and that's the way I was considering implementing it diff --git a/open_issues/code_analysis.mdwn b/open_issues/code_analysis.mdwn index ab90a6b6..552cd2c9 100644 --- a/open_issues/code_analysis.mdwn +++ b/open_issues/code_analysis.mdwn @@ -27,6 +27,13 @@ analysis|performance]], [[formal_verification]], as well as general * [[!wikipedia List_of_tools_for_static_code_analysis]] + * [Cppcheck](http://sourceforge.net/apps/mediawiki/cppcheck/) + + For example, [Debian's hurd_20110319-2 + package](http://qa.debian.org/daca/cppcheck/sid/hurd_20110319-2.html) + (Samuel Thibault, 2011-08-05: *I had a look at those, some are spurious; + the realloc issues are for real*). + * Coccinelle * diff --git a/open_issues/glibc_init_first.mdwn b/open_issues/glibc_init_first.mdwn new file mode 100644 index 00000000..774b7828 --- /dev/null +++ b/open_issues/glibc_init_first.mdwn @@ -0,0 +1,78 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_glibc]] + +IRC, freenode, #hurd, 2011-07-22 + + [additional init-first.c patch] + < tschwinge> civodul: The only thing I wonder about: Roland also once had + done similar changes, which I then found they didn'T work with GCC 4.1, + and backed them out in 08f53ee9d265ffdc7e0affd6acf346cceeb65559 and fixed + the issue differently in d8d27e633a7860b37fd2e3142822b640a066cc0f (and + e17cef66140d4c97710ea88bd8d12612799e1e0f). Have you reviewed this? + < tschwinge> That's in the Savannah glibc repository. + < tschwinge> And this has been in 2007, four years ago. I don't remember + all the details. + < tschwinge> And here is quite a good summary of this stuff, from + init-first.c: + < tschwinge> /* XXX This is all a crock and I am not happy with it. + < tschwinge> This poorly-named function is called by static-start.S, + < civodul> braunr: thanks; i must admit it took me a while to figure it out + ;-) + < tschwinge> which should not exist at all. */ + < tschwinge> civodul: I can imagine... :-/ + < civodul> tschwinge: re Roland's changes, that's weird; i plan to try to + reinstate his change and see if it works + < civodul> now, i won't test with GCC 4.1... + < tschwinge> Yeah... + < tschwinge> I'm happy if it works with 4.4 onwards. + < tschwinge> civodul: And it's safe (in GCC terms) to write to ``* ((void + **) __builtin_frame_address (0) + 1)'', and similar? + < tschwinge> Or should we be coding this few stuff in assembly? + < civodul> tschwinge: well, we should add a compile-time assertion for + __builtin_return_address (0) == *((void**)__builtin_frame_address (0) + + 1) + < civodul> (i think GCC can figure it out at compile-time) + < civodul> but on IA32 it should always be true + < civodul> what's the name of glibc's compile-time assert macro already? + < tschwinge> I wonder whether that might interfere with some of GCC's + optimizations? + < civodul> what? + < tschwinge> Well, it seems unclean for me to be modifying a function's + return address from within C code. + < tschwinge> civodul: I added a verify.h in the t/verify.h branch. But + people didn't really like it too much. They rather wanted to directly + inline the array[(cond)?1:-1] code. + < civodul> ok + < civodul> i remember a debate about Gnulib's verify.h + < civodul> i thought something comparable had landed eventually + < tschwinge> civodul: Oh, maybe I missed it. + < tschwinge> civodul: In init-first.c:init, what about the usage of + data[-1] in the else path (not using cthreads) -- is that good as-is? + < civodul> tschwinge: oooh, it probably needs to fixed too + < civodul> but i haven't reached that point yet ;-) + * civodul tries to cross-bootstrap GNU from scratch + < tschwinge> civodul: I'd be happy to learn what was wrong with Roland's + original idea of fixing this. Or perhaps this was a GCC 4.1 bug? Or + perhaps GCC was inlining to much, and then got confused with frames and + return addresses? + < civodul> tschwinge: Roland's change looks good to me, so it could have + been a GCC bug + < civodul> tschwinge: OK to commit the patch to t/init-first.c (with both + data[-1] replaced)? + < tschwinge> civodul: OK, if you are confident that it works with GCC 4.4 + onwards. If yes, please add your changelog snippet to .topmsg, and also + add a not that Roland's original code may in fact have been fine, and we + may have hit a compiler bug. + < civodul> tschwinge: OK, will do + < civodul> tschwinge: though regarding Roland's change, i'd prefer to + actually test and see + < tschwinge> civodul: Thanks! diff --git a/open_issues/gnumach_memory_management.mdwn b/open_issues/gnumach_memory_management.mdwn index 448aafcc..a728fc9d 100644 --- a/open_issues/gnumach_memory_management.mdwn +++ b/open_issues/gnumach_memory_management.mdwn @@ -923,3 +923,400 @@ There is a [[!FF_project 266]][[!tag bounty]] on this task. 20 years ago but it's a source of deadlock Indeed. I'll won't use kmem_alloc_pageable. + + +# IRC, freenode, #hurd, 2011-08-09 + + < braunr> mcsim: what's the "bug related to MEM_CF_VERIFY" you refer to in + one of your commits ? + < braunr> mcsim: don't use spin_lock_t as a member of another structure + < mcsim> braunr: I confused with types in *_verify functions, so they + didn't work. Than I fixed it in the commit you mentioned. + < braunr> in gnumach, most types are actually structure pointers + < braunr> use simple_lock_data_t + < braunr> mcsim: ok + < mcsim> > use simple_lock_data_t + < mcsim> braunr: ok + < braunr> mcsim: don't make too many changes to the code base, and if + you're unsure, don't hesitate to ask + < braunr> also, i really insist you rename the allocator, as done in x15 + for example + (http://git.sceen.net/rbraun/x15mach.git/?a=blob;f=vm/kmem.c), instead of + a name based on mine :/ + < mcsim> braunr: Ok. It was just work name. When I finish I'll rename the + allocator. + < braunr> other than that, it's nice to see progress + < braunr> although again, it would be better with some reports along + < braunr> i won't be present at the meeting tomorrow unfortunately, but you + should use those to report the status of your work + < mcsim> braunr: You've said that I have to tweak gc process. Did you mean + to call mem_gc() when physical memory ends instead of calling it every x + seconds? Or something else? + < braunr> there are multiple topics, alhtough only one that really matters + < braunr> study how zone_gc was called + < braunr> reclaiming memory should happen when there is pressure on the VM + subsystem + < braunr> but it shouldn't happen too ofte, otherwise there is trashing + < braunr> and your caches become mostly useless + < braunr> the original slab allocator uses a 15-second period after a + reclaim during which reclaiming has no effect + < braunr> this allows having a somehow stable working set for this duration + < braunr> the linux slab allocator uses 5 seconds, but has a more + complicated reclaiming mechanism + < braunr> it releases memory gradually, and from reclaimable caches only + (dentry for example) + < braunr> for x15 i intend to implement the original 15 second interval and + then perform full reclaims + < mcsim> In zalloc mem_gc is called by vm_pageout_scan, but not often than + once a second. + < mcsim> In balloc I've changed interval to once in 15 seconds. + < braunr> don't use the code as it is + < braunr> the version you've based your work on was meant for userspace + < braunr> where there isn't memory pressure + < braunr> so a timer is used to trigger reclaims at regular intervals + < braunr> it's different in a kernel + < braunr> mcsim: where did you see vm_pageout_scan call the zone gc once a + second ? + < mcsim> vm_pageout_scan calls consider_zone_gc and consider_zone_gc checks + if second is passed. + < braunr> where ? + < mcsim> Than zone_gc can be called. + < braunr> ah ok, it's in zaclloc.c then + < braunr> zalloc.c + < braunr> yes this function is fine + < mcsim> so old gc didn't consider vm pressure. Or I missed something. + < braunr> it did + < mcsim> how? + < braunr> well, it's called by the pageout daemon + < braunr> under memory pressure + < braunr> so it's fine + < mcsim> so if mem_gc is called by pageout daemon is it fine? + < braunr> it must be changed to do something similar to what + consider_zone_gc does + < mcsim> It does. mem_gc does the same work as consider_zone_gc and + zone_gc. + < braunr> good + < mcsim> so gc process is fine? + < braunr> should be + < braunr> i see mem.c only includes mem.h, which then includes other + headers + < braunr> don't do that + < braunr> always include all the headers you need where you need them + < braunr> if you need avltree.h in both mem.c and mem.h, include it in both + files + < braunr> and by the way, i recommend you use the red black tree instead of + the avl type + < braunr> (it's the same interface so it shouldn't take long) + < mcsim> As to report. If you won't be present at the meeting, I can tell + you what I have to do now. + < braunr> sure + < braunr> in addition, use GPLv2 as the license, teh BSD one is meant for + the userspace version only + < braunr> GPLv2+ actually + < braunr> hm you don't need list.c + < braunr> it would only add dead code + < braunr> "Zone for dynamical allocator", don't mix terms + < braunr> this comment refers to a vm_map, so call it a map + < mcsim> 1. Change constructor for kentry_alloc_cache. + < mcsim> 2. Make measurements. + < mcsim> + + < mcsim> 3. Use simple_lock_data_t + < mcsim> 4. Replace license + < braunr> kentry_alloc_cache <= what is that ? + < braunr> cache for kernel map entries in vm_map ? + < braunr> the comment for mem_cpu_pool_get doesn't apply in gnumach, as + there is no kernel preemption + < braunr> "Don't attempt mem GC more frequently than hz/MEM_GC_INTERVAL + times a second. + < braunr> " + < mcsim> sorry. I meant vm_map_kentry_cache + < braunr> hm nothing actually about this comment + < braunr> mcsim: ok + < braunr> yes kernel map entries need special handling + < braunr> i don't know how it's done in gnumach though + < braunr> static preallocation ? + < mcsim> yes + < braunr> that's ugly :p + < mcsim> but it uses dynamic allocation further even for vm_map kernel + entries + < braunr> although such bootstrapping issues are generally difficult to + solve elegantly + < braunr> ah + < mcsim> now I use only static allocation, but I'll add dynamic allocation + too + < braunr> when you have time, mind the coding style (convert everything to + gnumach style, which mostly implies using tabs instead of 4-spaces + indentation) + < braunr> when you'll work on dynamic allocation for the kernel map + entries, you may want to review how it's done in x15 + < braunr> the mem_source type was originally intended for that purpose, but + has slightly changed once the allocator was adapted to work in my kernel + < mcsim> ok + < braunr> vm_map_kentry_zone is the only zone created with ZONE_FIXED + < braunr> and it is zcram()'ed immediately after + < braunr> so you can consider it a statically allocated zone + < braunr> in x15 i use another strategy: there is a special kernel submap + named kentry_map which contains only one map entry (statically allocated) + < braunr> this map is the backend (mem_source) for the kentry_cache + < braunr> the kentry_cache is created with a special flag that tells it + memory can't be reclaimed + < braunr> when the cache needs to grow, the single map entry is extended to + cover the allocated memory + < braunr> it's similar to the way pmap_growkernel() works for kernel page + table pages + < braunr> (and is actually based on that idea) + < braunr> it's a compromise between full static and dynamic allocation + types + < braunr> the advantage is that the allocator code can be used (so there is + no need for a special allocator like in netbsd) + < braunr> the drawback is that some resources can never be returned to + their source (and under peaks, the amount of unfreeable resources could + become large, but this is unexpected) + < braunr> mcsim: for now you shouldn't waste your time with this + < braunr> i see the number of kernel map entries is fixed at 256 + < braunr> and i've never seen the kernel use more than around 30 entries + < mcsim> Do you think that I have to left this problem to the end? + < braunr> yes + + +# IRC, freenode, #hurd, 2011-08-11 + + < mcsim> braunr: Hello. Can you give me an advice how can I make + measurements better? + < braunr> mcsim: what kind of measurements + < mcsim> braunr: How much is your allocator better than zalloc. + < braunr> slightly :p + < braunr> that's why i never took the time to put it in gnumach + < mcsim> braunr: Just I thought that there are some rules or + recommendations of such measurements. Or I can do them any way I want? + < braunr> mcsim: i don't know + < braunr> mcsim: benchmarking is an art of its own, and i don't even know + how to use the bits of profiling code available in gnumach (if it still + works) + < antrik> mcsim: hm... are you saying you already have a running system + with slab allocator?... :-) + < braunr> mcsim: the main advantage i can see is the removal of many + arbitrary hard limits + < mcsim> antrik: yes + < antrik> \o/ + < antrik> nice work! + < braunr> :) + < braunr> the cpu layer should also help a bit, but it's hard to measure + < braunr> i guess it could be seen on the ipc path for very small buffers + < mcsim> antrik: Thanks. But I still have to 1. Change constructor for + kentry_alloc_cache. and 2. Make measurements. + < braunr> and polish the whole thing :p + < antrik> mcsim: I'm not sure this can be measured... the performance + differente in any real live usage is probably just a few percent at most + -- it's hard to construct a benchmark giving enough precision so it's not + drowned in noise... + < antrik> perhaps it conserves some memory -- but that too would be hard to + measure I fear + < braunr> yes + < braunr> there *should* be better allocation times, less fragmentation, + better accounting ... :) + < braunr> and no arbitrary limits ! + < antrik> :-) + < braunr> oh, and the self debugging features can be nice too + < mcsim> But I need to prove that my work wasn't useless + < braunr> well it wasn't, but that's hard to measure + < braunr> it's easy to prove though, since there are additional features + that weren't present in the zone allocator + < mcsim> Ok. If there are some profiling features in gnumach can you give + me a link with their description? + < braunr> mcsim: sorry, no + < braunr> mcsim: you could still write the basic loop test, which counts + the number of allocations performed in a fixed time interval + < braunr> but as it doesn't match many real life patterns, it won't be very + useful + < braunr> and i'm afraid that if you consider real life patterns, you'll + see how negligeable the improvement can be compared to other operations + such as memory copies or I/O (ouch) + < mcsim> Do network drivers use this allocator? + < mcsim> ok. I'll scrape up some test and than I'll report results. + + +# IRC, freenode, #hurd, 2011-08-26 + + < mcsim> hello. Are there any analogs of copy_to_user and copy_from_user in + linux for gnumach? + < mcsim> Or how can I determine memory map if I know address? I need this + for vm_map_copyin + < guillem> mcsim: vm_map_lookup_entry? + < mcsim> guillem: but I need to transmit map to this function and it will + return an entry which contains specified address. + < mcsim> And I don't know what map have I transmit. + < mcsim> I need to transfer static array from kernel to user. What map + contains static data? + < antrik> mcsim: Mach doesn't have copy_{from,to}_user -- instead, large + chunks of data are transferred as out-of-line data in IPC messages + (i.e. using VM magic) + < mcsim> antrik: can you give me an example? I just found using + vm_map_copyin in host_zone_info. + < antrik> no idea what vm_map_copyin is to be honest... + + +# IRC, freenode, #hurd, 2011-08-27 + + < braunr> mcsim: the primitives are named copyin/copyout, and they are used + for messages with inline data + < braunr> or copyinmsg/copyoutmsg + < braunr> vm_map_copyin/out should be used for chunks larger than a page + (or roughly a page) + < braunr> also, when writing to a task space, see which is better suited: + vm_map_copyout or vm_map_copy_overwrite + < mcsim> braunr: and what will be src_map for vm_map_copyin/out? + < braunr> the caller map + < braunr> which you can get with current_map() iirc + < mcsim> braunr: thank you + < braunr> be careful not to leak anything in the transferred buffers + < braunr> memset() to 0 if in doubt + < mcsim> braunr:ok + < braunr> antrik: vm_map_copyin() is roughly vm_read() + < antrik> braunr: what is it used for? + < braunr> antrik: 01:11 < antrik> mcsim: Mach doesn't have + copy_{from,to}_user -- instead, large chunks of data are transferred as + out-of-line data in IPC messages (i.e. using VM magic) + < braunr> antrik: that "VM magic" is partly implemented using vm_map_copy* + functions + < antrik> braunr: oh, you mean it doesn't actually copy data, but only page + table entries? if so, that's *not* really comparable to + copy_{from,to}_user()... + + +# IRC, freenode, #hurd, 2011-08-28 + + < braunr> antrik: the equivalent of copy_{from,to}_user are + copy{in,out}{,msg} + < braunr> antrik: but when the data size is about a page or more, it's + better not to copy, of course + < antrik> braunr: it's actually not clear at all that it's really better to + do VM magic than to copy... + + +# IRC, freenode, #hurd, 2011-08-29 + + < braunr> antrik: at least, that used to be the general idea, and with a + simpler VM i suspect it's still true + < braunr> mcsim: did you progress on your host_zone_info replacement ? + < braunr> mcsim: i think you should stick to what the original + implementation did + < braunr> which is making an inline copy if caller provided enough space, + using kmem_alloc_pageable otherwise + < braunr> specify ipc_kernel_map if using kmem_alloc_pageable + < mcsim> braunr: yes. And it works. But I use kmem_alloc, not pageable. Is + it worse? + < mcsim> braunr: host_zone_info replacement is pushed to savannah + repository. + < braunr> mcsim: i'll have a look + < mcsim> braunr: I've pushed one more commit just now, which has attitude + to host_zone_info. + < braunr> mem_alloc_early_init should be renamed mem_bootstrap + < mcsim> ok + < braunr> mcsim: i don't understand your call to kmem_free + < mcsim> braunr: It shouldn't be there? + < braunr> why should it be there ? + < braunr> you're freeing what the copy object references + < braunr> it's strange that it even works + < braunr> also, you shouldn't pass infop directly as the copy object + < braunr> i guess you get a warning for that + < braunr> do what the original code does: use an intermediate copy object + and a cast + < mcsim> ok + < braunr> another error (without consequence but still, you should mind it) + < braunr> simple_lock(&mem_cache_list_lock); + < braunr> [...] + < braunr> kr = kmem_alloc(ipc_kernel_map, &info, info_size); + < braunr> you can't hold simple locks while allocating memory + < braunr> read how the original implementation works around this + < mcsim> ok + < braunr> i guess host_zone_info assumes the zone list doesn't change much + while unlocked + < braunr> or that's it's rather unimportant since it's for debugging + < braunr> a strict snapshot isn't required + < braunr> list_for_each_entry(&mem_cache_list, cache, node) max_caches++; + < braunr> you should really use two separate lines for readability + < braunr> also, instead of counting each time, you could just maintain a + global counter + < braunr> mcsim: use strncpy instead of strcpy for the cache names + < braunr> not to avoid overflow but rather to clear the unused bytes at the + end of the buffer + < braunr> mcsim: about kmem_alloc vs kmem_alloc_pageable, it's a minor + issue + < braunr> you're handing off debugging data to a userspace application + < braunr> a rather dull reporting tool in most cases, which doesn't require + wired down memory + < braunr> so in order to better use available memory, pageable memory + should be used + < braunr> in the future i guess it could become a not-so-minor issue though + < mcsim> ok. I'll fix it + < braunr> mcsim: have you tried to run the kernel with MC_VERIFY always on + ? + < braunr> MEM_CF_VERIFY actually + < mcsim1> yes. + < braunr> oh + < braunr> nothing wrong + < braunr> ? + < mcsim1> it is always set + < braunr> ok + < braunr> ah, you set it in macros.h .. + < braunr> don't + < braunr> put it in mem.c if you want, or better, make it a compile-time + option + < braunr> macros.h is a tiny macro library, it shouldn't define such + unrelated options + < mcsim1> ok. + < braunr> mcsim1: did you try fault injection to make sure the checking + code actually works and how it behaves when an error occurs ? + < mcsim1> I think that when I finish I'll merge files cpu.h and macros.h + with mem.c + < braunr> yes that would simplify things + < mcsim1> Yes. When I confused with types mem_buf_fill worked wrong and + panic occurred. + < braunr> very good + < braunr> have you progressed concerning the measurements you wanted to do + ? + < mcsim1> not much. + < braunr> ok + < mcsim1> I think they will be ready in a few days. + < antrik> what measurements are these? + < mcsim1> braunr: What maximal size for static data and stack in kernel? + < braunr> what do you mean ? + < braunr> kernel stacks are one page if i'm right + < braunr> static data (rodata+data+bss) are limited by grub bugs only :) + < mcsim1> braunr: probably they are present, because when I created too big + array I couldn't boot kernel + < braunr> local variable or static ? + < mcsim1> static + < braunr> how large ? + < mcsim1> 4Mb + < braunr> hm + < braunr> it's not a grub bug then + < braunr> i was able to embed as much as 32 MiB in x15 while doing this + kind of tests + < braunr> I guess it's the gnu mach boot code which only preallocates one + page for the initial kernel mapping + < braunr> one PTP (page table page) maps 4 MiB + < braunr> (x15 does this completely dynamically, unlike mach or even + current BSDs) + < mcsim1> antrik: First I want to measure time of each cache + creation/allocation/deallocation and then compile kernel. + < braunr> cache creation is irrelevant + < braunr> because of the cpu pools in the new allocator, you should test at + least two different allocation patterns + < braunr> one with quick allocs/frees + < braunr> the other with large numbers of allocs then their matching frees + < braunr> (larger being at least 100) + < braunr> i'd say the cpu pool layer is the real advantage over the + previous zone allocator + < braunr> (from a performance perspective) + < mcsim1> But there is only one cpu + < braunr> it doesn't matter + < braunr> it's stil a very effective cache + < braunr> in addition to reducing contention + < braunr> compare mem_cpu_pool_pop() against mem_cache_alloc_from_slab() + < braunr> mcsim1: work is needed to polish the whole thing, but getting it + actually working is a nice achievement for someone new on the project + < braunr> i hope it helped you learn about memory allocation, virtual + memory, gnu mach and the hurd in general :) + < antrik> indeed :-) diff --git a/open_issues/hurd_101.mdwn b/open_issues/hurd_101.mdwn new file mode 100644 index 00000000..5c7031c9 --- /dev/null +++ b/open_issues/hurd_101.mdwn @@ -0,0 +1,38 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +(See Wikipedia page for the meaning of [[!wikipedia "101_(term)"]].) + +Not the first time that something like this is proposed... + +IRC, freenode, #hurd, 2011-07-25 + + [failed GNU/Hurd project] + < antrik> gnu_srs1: I wouldn't say he was on track. just one of the many + many people who insist on picking a hard task; realizing that indeed it's + hard; and going into hiding + < antrik> we see that happen every couple of months + < cluck> maybe we need a "hurd 101" + < cluck> getting a teacher and setting up a regularly held "class" for hurd + noobs + < Tekk_> cluck: what would that include? + < cluck> explaining core concepts, giving out "homework" (small tasks), etc + < cluck> that way "the big guys" could focus on the hard stuff and have an + army of code monkeys at their disposal to write speced stuff + < cluck> (then again this idea would heavily depend on available "teachers" + and "students", which, going by gsoc numbers, may not be all that + helpful) + < Tekk_> cluck: gsoc isn't an accurate indicator + < Tekk_> cluck: I'm not allowed to participate in gsoc but I'd join :P + < antrik> cluck: we don't need code monkeys... we need hackers + < Tekk_`> antrik: code monkeys involve into hackers + < Tekk_`> under the right conditions + < cluck> antrik: jokes aside some sort of triage system/training ground for + newcomers could be helpful diff --git a/open_issues/libpthread_dlopen.mdwn b/open_issues/libpthread_dlopen.mdwn index 0d3628ec..fb665c67 100644 --- a/open_issues/libpthread_dlopen.mdwn +++ b/open_issues/libpthread_dlopen.mdwn @@ -40,8 +40,36 @@ IRC, OFTC, #debian-hurd, 2011-07-21. The fix thus being: link the main application with -lpthread. -The same symptom appears in an odd case, for instance: +IRC, freenode, #hurd, 2011-08-17 + + < youpi> i.e. openjade apparently dlopen()s modules which use pthreads, but + openjade itself is not liked against libpthread + < youpi> which means unexpectedly loading pthreads on the fly, which is + not implemented + < youpi> (and hard to implement of course) + < youpi> gnu_srs: so simply tell openjade people to link it with -lpthread + < gnu_srs> Shuoldn't missing linking with pthread create an error when + building openjade then? + < youpi> no + < youpi> because it's just a module which needs pthread + < youpi> and that module _is_ linked with -lpthread + < youpi> and dlopen() loads libpthreads too due to that + < youpi> but that's unexpected, for the libpthread initialization stuff + < youpi> (and too late to fix initlaization) + < gnu_srs> How come that other OSes build opensp w/o problems? + < youpi> because there are stubs in the libc + < gnu_srs> Sorry for the delay: What hinders stubs to be present also in + the Hurd libc parts too, to cope with this problem? + < youpi> doing it + < youpi> which is hard because you need libpthread bits inside the libc + < youpi> making it simpler would need building libpthread at the same time + as libc + +[[packaging_libpthread]] +--- + +The same symptom appears in an odd case, for instance: buildd@hurd:~$ ldd /usr/bin/openjade libthreads.so.0.3 => /lib/libthreads.so.0.3 (0x0103d000) diff --git a/open_issues/mach_tasks_memory_usage.mdwn b/open_issues/mach_tasks_memory_usage.mdwn index 88e3afb8..9abb7639 100644 --- a/open_issues/mach_tasks_memory_usage.mdwn +++ b/open_issues/mach_tasks_memory_usage.mdwn @@ -10,7 +10,7 @@ License|/fdl]]."]]"""]] [[!tag open_issue_documentation]] -IRC, #hurd, 2011-01-06. +IRC, freenode, #hurd, 2011-01-06 hm, odd... vmstat tells me that ~500 MiB of RAM are in use; but the sum of all RSS is <300 MiB... what's the rest? @@ -98,3 +98,50 @@ IRC, #hurd, 2011-01-06. braunr: yeah for bootstrapping issues, makes sense it may also depends on the pic/pie options used when building libraries + + +IRC, freenode, #hurd, 2011-07-24 + + < braunr> the panic is probably due to memory shortage + < braunr> so as antrik suggested, use more swap + < antrik> gg0: you could run "vmstat 1" in another terminal to watch memory + usage + < antrik> that way we will know for sure whether it's related + < braunr> antrik: it's trickier than that + < braunr> it depends if the zones used are pageable + < antrik> braunr: well, if it's a zone map exhaustion, then the swap size + won't change anything?... + < braunr> antrik: in this case no, but if the zone is pageable and the + pager (backing anonymous memory) refuses to create memory because it + estimates it's full (all swap space is reserved), it will fail to + < braunr> too + < braunr> but i don't think there are much pageable zones in the kernel + < antrik> yes, but in that case we can see the exhaustion in vmstat :-) + < braunr> many* + < braunr> i'm not sure + < braunr> reserved swap space doesn't mean it's used + < braunr> that's one of the major changes in freebsd 4 or 5 i was + mentioning + < antrik> if it's reserved, it wouldn't show up as "free", would it?... + < braunr> (btw, it's also what makes anonymous memory merging so hard) + < braunr> yes it would + < braunr> well, it could, i'm not sure + < braunr> anonymous memory is considered as a file + < braunr> one big file filled with zeroes, which is the swap partition + < braunr> when you allocate pageable anonymous memory, a part of this + "file" is reserved + < braunr> but i don't know if the reported number if the reserved + (allocated) space, or used (actually containing data) + < braunr> is* + < braunr> i also suspect wired allocations can fail because of a full swap + (because the kernel is unable to make free pages) + < braunr> in this case vmstat will show it + < antrik> what does it matter whether there is data there or not? if it's + reserved, it's not free. if it behaves differently, I'd consider that a + serious bug + < braunr> maybe the original developers intended to monitor its actual + usage + < braunr> antrik: i've just checked how the free count gets updated, and it + looks like it is on both seqnos_memory_object_data_initialize and + seqnos_memory_object_data_write + < braunr> antrik: so i guess reserved memory is accounted for diff --git a/open_issues/mmap_crash_etc.mdwn b/open_issues/mmap_crash_etc.mdwn new file mode 100644 index 00000000..4946a5a0 --- /dev/null +++ b/open_issues/mmap_crash_etc.mdwn @@ -0,0 +1,95 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +Several issues here: + + * [[!tag open_issue_glibc open_issue_gnumach]] Even invalid `mmap` shoudn't + crash the process. + + * [[!tag open_issue_documentation]] The memory layout example should be + documented. + + * [[!tag open_issue_gnumach]] New `vm_map` allocation strategy may be + desirable; see also [[placement_of_virtual_memory_regions]]. + + * [[!tag open_issue_glibc]] *task X deallocating an invalid port Y, most + probably a bug*. + +IRC, freenode, #hurd, 2011-08-11 + + < zyg> oh, mmap sigsegvs, strange. + < braunr> hwo do you see that ? + < zyg> braunr: I'll try to paste a minimal case + < braunr> zyg: make sure you have a sane memory setup + < braunr> 512 RAM / 1G swap seems good + < braunr> have more swap than RAM + < zyg> I have those. Still it shouldn't sigsegv. + < braunr> gnumach is picky about that + < braunr> and yes, the hurd shouldn't have bugs + < zyg> braunr: ready to crash? #include #include int + main (int argc, char **argv) { mmap(0x10000, 0x8000, PROT_READ, MAP_ANON + | MAP_FIXED, -1, 0); return 0; } + < braunr> a fixed mapping at such an address is likely to fail, yes + < braunr> but a crash, hm + < zyg> why should it fail? + < braunr> because the hurd doesn't have a common text data bss heap stack + layout + < braunr> e.g. there are mappings below text, as show by vminfo : + < braunr> $ vminfo $$ + < braunr> 0[0x1000] (prot=0) + < braunr> 0x1000[0x21000] (prot=RX, max_prot=RWX, mem_obj=105) + < braunr> 0x22000[0x1000] (prot=R, max_prot=RWX, mem_obj=105) + < braunr> 0x23000[0x1000] (prot=RW, max_prot=RWX, mem_obj=105) + < braunr> 0x24000[0x1000] (prot=0, max_prot=RWX) + < braunr> 0x25000[0xfff000] (prot=RWX, mem_obj=106) + < braunr> 0x1024000[0x1000] (prot=RWX, mem_obj=107) + < braunr> 0x1025000[0x1000] (prot=RW, max_prot=RWX, mem_obj=108) + < braunr> 0x1026000[0x1000] (prot=RW, max_prot=RWX, mem_obj=108, + offs=0x1000) + < braunr> 0x1027000[0x1000] (prot=RW, max_prot=RWX, mem_obj=109) + < braunr> 0x1028000[0x2000] (prot=RW, max_prot=RWX, mem_obj=110, + offs=0x1000) + < braunr> 0x102a000[0x1000] (prot=RW, max_prot=RWX, mem_obj=111) + < braunr> (sorry for the long paste) + < zyg> oh.. my mmap falls into an occupied range? + < braunr> seems so + < zyg> thanks, that was really useful. + < braunr> MAP_FIXED isn't portable, this is clearly stated in most man + pages + < zyg> yes, implementation specific it says + < braunr> well the behaviour isn't specific, it's well defined, but the + memory layout isn't + < braunr> i personally think vm_map() should be slightly changed to include + a new flag for top-down allocations + < braunr> so that our stack and libraries are at high addresses, below the + kernel + < braunr> zyg: what kind of error do you get ? i don't get sigsegv + < zyg> I get both sigsegv and sigill depending on addr + < braunr> ok + < braunr> i get sigill with your example + < braunr> the error is the same (wrong memory access) but the behaviour + changes because of the special memory configuration + < zyg> yes.. I guess the usecase is too uncommon. Else mmap would have an + guard + < braunr> some accesses cause invalid page faults (which are sent as + segmentation faults) while other cause general protection faults (which + are sent as illegal instructions) + < braunr> (this is quite weird since the GP fault is likely because the + access targets something out of the data or code segment eh) + < zyg> braunr: that's very os-specific. Do you mean hurd behaves that way? + < braunr> gnumach + < braunr> on i386 + < braunr> the segmant configuration isn't completely flat + < braunr> segment* + < braunr> hm nice + < braunr> your small program triggers the "task X deallocating an invalid + port Y, most probably a bug." message + < zyg> where do you see that? + < braunr> on the mach console diff --git a/open_issues/multiprocessing.mdwn b/open_issues/multiprocessing.mdwn index 224c0826..562ccd83 100644 --- a/open_issues/multiprocessing.mdwn +++ b/open_issues/multiprocessing.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2010 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2010, 2011 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -8,7 +8,7 @@ Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license is included in the section entitled [[GNU Free Documentation License|/fdl]]."]]"""]] -[[!tag open_issue_hurd]] +[[!tag open_issue_documentation open_issue_hurd]] We would expect that fine-grained, compartmentalized systems, that is, microkernel-based multi-server systems in particular, would be ideal candidates @@ -16,7 +16,7 @@ for applying multiprocessing. That is, however, only true from a first and inexperienced point of view: there are many difficulties. -IRC, #hurd, August / September 2010 +IRC, freenode, #hurd, August / September 2010 silver_hook: because multi-server systems depend on inter-process communication, and inter-process communication is many times more @@ -31,6 +31,37 @@ IRC, #hurd, August / September 2010 serious research challenges +IRC, freenode, #hurd, 2011-07-26 + + < braunr> 12:03 < CTKArcher> and does the hurd take more advantages in a + multicore architecture than linux ? + < braunr> CTKArcher: short answer: no + < CTKArcher> it's easier to imagine one server pro core than the linux + kernel divided to be executed on multiple cores + < braunr> CTKArcher: this approach is less efficient + < braunr> CTKArcher: threads carry state, both explicit and implicit (like + cache data) + < braunr> CTKArcher: switching to another core means resetting and + refetching this state + < braunr> it's expensive and there is no gain obtained by doing this + < braunr> thread migration (having a thread from a client also run in + servers when making synchronous RPC, even handling its own page faults) + was implemented in mach4 and is imo a very good thing we should have + < braunr> CTKArcher: and concerning linux, it's actually very scalable + < braunr> it's already like if all client threads run in servers (the + kernel is the servers there) + < braunr> rcu is used a lot + < braunr> thread migration already takes into account smt, cores, and numa + < braunr> it's hard to do something better + < braunr> (here, thread migration means being dispatched on another cpu) + < braunr> some systems like dragonflybsd go as far as to pin threads on one + processor for their entire lifetime + < braunr> in order to have rcu-like locking almost everywhere + < braunr> (you could argue it's less efficient since in the worst case + everything runs on the same cpu, but it's very unlikely, and in practice + most patterns are well balanced) + + debian-hurd list On Thu, Jan 02, 2003 at 05:40:00PM -0800, Thomas Bushnell, BSG wrote: diff --git a/open_issues/packaging_libpthread.mdwn b/open_issues/packaging_libpthread.mdwn index 7594ae76..fa3d4312 100644 --- a/open_issues/packaging_libpthread.mdwn +++ b/open_issues/packaging_libpthread.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2010 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2010, 2011 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -15,6 +15,9 @@ IRC, #hurd, 2010-07-31 My idea was to have a separate libpthread package. What do you think about that? in the long term, that can't work with glibc because of the thread stub stuff + +[[libpthread_dlopen]], for example. + it's not really possible to keep synchronized because you have to decide which package you unpack first (when upgrading) diff --git a/open_issues/performance.mdwn b/open_issues/performance.mdwn index eb9f3f8a..54f3ce39 100644 --- a/open_issues/performance.mdwn +++ b/open_issues/performance.mdwn @@ -26,3 +26,7 @@ severe performance degradation. For example, in this [[`fork` system call|/glibc/fork]]'s case. [[Unit_testing]] can be used for tracking performance regressions. + +--- + + * [[Degradation]] diff --git a/open_issues/performance/degradation.mdwn b/open_issues/performance/degradation.mdwn new file mode 100644 index 00000000..5db82e31 --- /dev/null +++ b/open_issues/performance/degradation.mdwn @@ -0,0 +1,28 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!meta title="Degradation of GNU/Hurd ``system performance''"]] + +Email, *id:"87mxg2ahh8.fsf@kepler.schwinge.homeip.net"* (bug-hurd, 2011-07-25, +Thomas Schwinge) + +> Building a certain GCC configuration on a freshly booted system: 11 h. +> Remove build tree, build it again (2nd): 12 h 50 min. Huh. Remove build +> tree, reboot, build it again (1st): back to 11 h. Remove build tree, build +> it again (2nd): 12 h 40 min. Remove build tree, build it again (3rd): 15 h. + +IRC, freenode, #hurd, 2011-07-23 + + < antrik> tschwinge: yes, the system definitely gets slower with + time. after running for a couple of weeks, it needs at least twice as + long to open a new shell for example + < antrik> I don't know whether this is only related to swap usage, or there + are some serious fragmentation issues + < braunr> antrik: both could be induced by fragmentation diff --git a/open_issues/performance/io_system/binutils_ld_64ksec.mdwn b/open_issues/performance/io_system/binutils_ld_64ksec.mdwn index 79c2300f..359d5fee 100644 --- a/open_issues/performance/io_system/binutils_ld_64ksec.mdwn +++ b/open_issues/performance/io_system/binutils_ld_64ksec.mdwn @@ -33,3 +33,18 @@ the testee shows that (primarily) an ever-repeating series of `io_seek` and `io_read` is being processed. Running the testee on GNU/Linux with strace shows the equivalent thing (`_llseek`, `read`) -- but Linux' I/O system isn't as slow as the Hurd's. + +--- + +IRC, freenode, #hurd, 2011-09-01: + + hum, f951 does myriads of 71->io_seek_request (32768 0) = 0 32768 + no wonder it's slow + unfortunately that's also what it does on linux, the system call is + just less costly + apparently gfortran calls io_seek for, like, every token of the + sourced file + (fgetpos actually, but that's the same) + and it is indeed about 10 times slower under Xen for some reason + +[[!tag open_issue_xen]] diff --git a/open_issues/performance/microkernel_multi-server.mdwn b/open_issues/performance/microkernel_multi-server.mdwn new file mode 100644 index 00000000..111d2b88 --- /dev/null +++ b/open_issues/performance/microkernel_multi-server.mdwn @@ -0,0 +1,47 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_documentation]] + +Performance issues due to the microkernel/multi-server system architecture? + +IRC, freenode, #hurd, 2011-07-26 + + < CTKArcher> I read that, because of its microkernel+servers design, the + hurd was slower than a monolithic kernel, is that confirmed ? + < youpi> the hurd is currently slower than current monolithic kernels, but + it's not due to the microkernel + servers design + < youpi> the microkernel+servers design makes the system call path longer + < youpi> but you're bound by disk and network speed + < youpi> so the extra overhead will not hurt so much + < youpi> except dumb applications keeping doing system calls all the time + of course, but they are usually considered bogus + < braunr> there may be some patterns (like applications using pipes + extensively, e.g. git-svn) which may suffer from the design, but still in + an acceptable range + < CTKArcher> so, you are saying that disk and network are more slowing the + system than the longer system call path and because of that, it wont + really matter ? + < youpi> braunr: they should sitll be fixed because they'll suffer (even if + less) on monolithic kernels + < youpi> CTKArcher: yes + < braunr> yes + < CTKArcher> mmh + < youpi> CTKArcher: you might want to listen to AST's talk at fosdem 10 + iirc, about minix + < youpi> they even go as far as using an IPC for each low-level in/out + < youpi> for security + < braunr> this has been expected for a long time + < braunr> which is what motivated research in microkernels + < CTKArcher> I've already downloaded the video :) + < youpi> and it has been more and more true with faster and faster cpus + < braunr> but in 95, processors weren't that fast compared to other + components as they are now + < youpi> while disk/mem haven't evovled so fast diff --git a/open_issues/proc_server_proc_exception_raise.mdwn b/open_issues/proc_server_proc_exception_raise.mdwn new file mode 100644 index 00000000..1d0e92a3 --- /dev/null +++ b/open_issues/proc_server_proc_exception_raise.mdwn @@ -0,0 +1,37 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_hurd]] + +IRC, freenode, #hurd, 2011-08-11 + + < youpi> in which error cases a reply port will actually have been consumed + by mach_msg ? + < youpi> it seems at least MACH_SEND_NOTIFY_IN_PROGRESS do? + < braunr> + http://www.gnu.org/software/hurd/gnumach-doc/Message-Send.html#Message-Send + < braunr> "These return codes imply that the message was returned to the + caller with a pseudo-receive operation: " + < braunr> isn't it what you're looking for ? + < youpi> well, it's hard to tell from the name + < youpi> I don't know what "pseudo-receiv operation" means + < braunr> it's described below + < youpi> ew + < braunr> it looks close enough to a normal receive to assume it consumes + the reply port + < youpi> so it's even more complex than what I thought + < youpi> well, no, it returns the right + < youpi> actually the error I'm getting is MACH_RCV_INVALID_NAME + < youpi> which I guess means the sending part succeeded + < youpi> the case at stake is proc/mgt.c: S_proc_exception_raise() + < youpi> when the proc_exception_raise() forward fails + < youpi> currently we always return 0, but if proc_exception_raise() + actually managed to send the message, the reply port was consumed and + MIG_NO_REPLY should be returned instead diff --git a/open_issues/resource_management_problems.mdwn b/open_issues/resource_management_problems.mdwn index 760c7d66..1558bebb 100644 --- a/open_issues/resource_management_problems.mdwn +++ b/open_issues/resource_management_problems.mdwn @@ -61,7 +61,22 @@ This is, of course, non-trivial to implement, and also requires changing the SPLICE_F_GIFT flag](http://www.kernel.org/doc/man-pages/online/pages/man2/vmsplice.2.html#DESCRIPTION).) +IRC, freenode, #hurd, 2011-07-31 + + < braunr> one of the biggest problems on the hurd is that, when a client + makes a call, kernel (and other) resources are allocated on behalf of the + server performaing the requested action + < braunr> performing* + < braunr> this makes implementing scheduling and limits difficult + < CTKArcher> And could changing the kernel change anything to that ? + < braunr> yes but you'd probably need to change its interface as well + < braunr> iirc, the critique describes resource containers + < braunr> but no work has been done on the current hurd (hence the hurdng + attempts) + # Further Examples + * [[IO_accounting]] + * [[configure max command line length]] diff --git a/open_issues/resource_management_problems/io_accounting.mdwn b/open_issues/resource_management_problems/io_accounting.mdwn new file mode 100644 index 00000000..113b965a --- /dev/null +++ b/open_issues/resource_management_problems/io_accounting.mdwn @@ -0,0 +1,49 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +IRC, freenode, #hurd, 2011-07-22 + + an interesting question i've had in mind for a few weeks now is + I/O accounting + what *is* I/O on a microkernel based system ? + can any cross address space transfer be classified as I/O ? + +IRC, freenode, #hurd, 2011-07-29 + + < braunr> how does the hurd account I/O ? + < youpi> I don't think it does + < youpi> not an easy task, actually + < youpi> since gnumach has no idea about it + < braunr> yes + < braunr> another centralization issue + < braunr> does network access count as I/O on linux ? + < youpi> no + < braunr> not even nfs ? + < youpi> else you'd get 100% for servers :) + < braunr> right + < youpi> nfs goes through vfs first + < braunr> i'll rephrase my question + < youpi> I'd need to check but I believe it can check nfs + < braunr> does I/O accounting occur at the vfs level or block layer ? + < youpi> I don't know, but I beleive vfs + < youpi> (at least that's how I'd do it) + < braunr> i don't have any more nfs box to test that :/ + < braunr> personally i'd do it at the block layer :) + < youpi> well, both + < youpi> so e2fsck can show up too + < braunr> yes + < youpi> it's just a matter of ref counting + < youpi> apparently nfs doesn't account + < youpi> find . -printf "" doesn't show up in waitio + < braunr> good + < youpi> well, depends on the point of view + < youpi> as a user, you'd like to know whether your processes are stuck on + i/o (be it disk or net) + < braunr> this implies clearly defining what io is diff --git a/open_issues/sa_siginfo_sa_sigaction.mdwn b/open_issues/sa_siginfo_sa_sigaction.mdwn index d6199b6a..3b8edff7 100644 --- a/open_issues/sa_siginfo_sa_sigaction.mdwn +++ b/open_issues/sa_siginfo_sa_sigaction.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2010 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2010, 2011 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -45,3 +45,50 @@ IRC, #hurd, August / September 2010: (i.e. replace with 0 in your example) ok when SA_SIGINFO becomes available, it'll just be used + +IRC, freenode, #hurd, 2011-08-20: + + < youpi> erf, tcpwrappers will need si_pid + < jkoenig> I could implement it not too far away in the future, we just + need a version of msg_sig_post() with a siginfo argument or something. + < youpi> I can also see a lot of packages using SA_SIGINFO for no reason... + < youpi> (probably copy/pasty code) + < youpi> sa.sa_flags = SA_SIGINFO; + < youpi> sa.sa_handler = parse_config; + < youpi> void parse_config(int) + < youpi> yay + < youpi> if(siginf->si_signo == SIGXCPU) + < youpi> fprintf(stderr, "Exceeded CPU usage.\n"); + < youpi> ... + < youpi> jkoenig: actually most package don't actually use the SA_SIGINFO + they request... + < youpi> jkoenig: si_pid should get us almost all actually used coverage + < youpi> I've seen only one example using si_errno + < jkoenig> ok + < youpi> oh, it's actually supported by your patch + < youpi> (errno) + < jkoenig> but I guess since implementing si_pid will require a new RPC, we + might as well plan for the rest + < youpi> jkoenig: indeed + < jkoenig> youpi, hmm I doubt it's properly filled in in all circumstances? + < youpi> ok, well, we'll see + < pinotree> jkoenig: if it can be of help, boost::unit_test queries various + fields of siginfo_t depending on the signal + < pinotree> jkoenig: also, pulseaudio uses siginfo_t for remapping faulting + memory on SIGBUS + < jkoenig> pinotree, oh ok good to know + < pinotree> *faulty + < youpi> jkoenig: well, I guess you had checked that the si_addr field is + correct in a few simple testcase :) + < jkoenig> hmm I think so, yes + < jkoenig> I ran like, "* (char *) 0x12345678;" or something IIRC + < youpi> ok + < jkoenig> I seem to remember mach generated SIGBUS instead of SIGSEGV + depending on the upper bit, or something (I can't quite remember) + < jkoenig> but when sigsegv was generated si_addr was right. + < pinotree> jkoenig: (see boost/test/impl/execution_monitor.ipp in boost + sources) + < pinotree> maybe you can try the unit tests for boost::unit_tests, if any + :) + < pinotree> (while src/pulsecore/memtrap.c in PA) + * pinotree stops doing MrObvious™ diff --git a/open_issues/sbcl.mdwn b/open_issues/sbcl.mdwn new file mode 100644 index 00000000..4bbf92ef --- /dev/null +++ b/open_issues/sbcl.mdwn @@ -0,0 +1,31 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_porting]] + +IRC, freenode, #hurd, 2011-08-12 + + < zyg> did the segment registers had any purpose? I see fs is set equal to + others, but on linux fs is 0 (atleast on this x86 box). + < braunr> zyg: it can be used by special applications like wine, yes + < zyg> braunr: thanks.. I'm reading up on linux actually. It seems gs can + be used for TLS, fs in syscall to pass userspace. + < braunr> zyg: why are you interested in that ? + < zyg> a native compiler under linux places assumptions on fs register. So + I'm trying to find out what it should do under gnumach/hurd. + < braunr> what compiler ? + < zyg> braunr: it's sbcl + < braunr> ok + < youpi> zyg: the same, basically + < zyg> ok.. looking at the code, I've remarked where it sets up FS, because + /usr/include/asm/ldt.h:struct user_desc is missing. I must search for the + equiv. + < youpi> zyg: mach/i386/mach_i386.h + < youpi> the descriptor structure diff --git a/open_issues/sendmsg_scm_creds.mdwn b/open_issues/sendmsg_scm_creds.mdwn index 2deec7e8..c613e21c 100644 --- a/open_issues/sendmsg_scm_creds.mdwn +++ b/open_issues/sendmsg_scm_creds.mdwn @@ -90,6 +90,10 @@ IRC, unknown channel, unknown date. yep ok, good :) +/!\ IRC, freenode, #hurd, 2011-08-11 + + < pinotree> (but that patch is lame) + --- See also [[pflocal_socket_credentials_for_local_sockets]] and [[pflocal_reauth]]. diff --git a/open_issues/syslog.mdwn b/open_issues/syslog.mdwn index 778933a7..5fec38b1 100644 --- a/open_issues/syslog.mdwn +++ b/open_issues/syslog.mdwn @@ -1,7 +1,45 @@ IRC, unknwon channel, unknown date. - scolobb: In wiki edit 60accafa79f645ae61b578403f7fc0c11914b725 I see that you intend(ed) to use syslog for logging debug messages. I thought I'd point you to http://lists.gnu.org/archive/html/bug-hurd/2007-02/msg00042.html -- no idea if that's still an issue or what went wrong at that time. Perhaps you can have a look? - tschwinge: Thanks for information! Currently I'm logging some debug messages to a simple file, but I'll now check whether the issue you've pointed out is still present. - tschwinge: I am getting absolutely abnormal results: when I call syslog() from a simple C program for the first time, the message goes to the system log. However, any further calls to syslog() do just nothing... I am able to send something to syslog only after reboot (it doesn't help if I restart syslogd). + scolobb: In wiki edit 60accafa79f645ae61b578403f7fc0c11914b725 + I see that you intend(ed) to use syslog for logging debug messages. I + thought I'd point you to + http://lists.gnu.org/archive/html/bug-hurd/2007-02/msg00042.html -- no + idea if that's still an issue or what went wrong at that time. Perhaps + you can have a look? + tschwinge: Thanks for information! Currently I'm logging some + debug messages to a simple file, but I'll now check whether the issue + you've pointed out is still present. + tschwinge: I am getting absolutely abnormal results: when I call + syslog() from a simple C program for the first time, the message goes to + the system log. However, any further calls to syslog() do just + nothing... I am able to send something to syslog only after reboot (it + doesn't help if I restart syslogd). +IRC, freenode, #hurd, 2011-08-08 + + < pinotree> wow, `logger` + a simple C udp server can cause havoc + < pinotree> youpi: ever seen something like + http://paste.debian.net/hidden/72cf4b77/ ? + < pinotree> and then also other servers (like pflocal, pfinet, few more) + start becoming crazy (using 100% cpu) + < youpi> nope + < pinotree> iirc in one of the few tries i got the message "Resource lost." + from the closed ssh connection + < pinotree> i was trying to see why syslog doesn't work, but this basically + surprised me... + < pinotree> oh, i found an apparently working syslog daemon + < pinotree> dsyslog + < gg0> have you tried syslog-ng? IIRC it writes in /var/log/messages by + default. + < pinotree> yeah, it seems to stop receiving messages are few + < pinotree> gg0: are you using syslog-ng? + < gg0> pinotree: I should fire hurd vm up. I seem I kept dirty-patched + busybox syslog, I don't even know if it works, at least it starts + http://bugs.debian.org/636162 + < pinotree> maintainer said "not really" + < gg0> well, if all other syslogs use shm and sems, they won't work too, + right? + < youpi> shm should work with the latest libc + < youpi> what won't is sysv sem + < youpi> (i.e. semget) diff --git a/open_issues/tty_activitiy_vs_disk_io.mdwn b/open_issues/tty_activitiy_vs_disk_io.mdwn new file mode 100644 index 00000000..26382d56 --- /dev/null +++ b/open_issues/tty_activitiy_vs_disk_io.mdwn @@ -0,0 +1,81 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_hurd]] + +IRC, freenode, #hurd, 2011-07-25 + + < youpi> Mmm, typing something on the mach console triggers a write on the + disk + < youpi> because the /dev/console node gets updated + < youpi> I don't really see why + < youpi> (yes, just typing at the bash prompt, not even running something) + < youpi> typing during the sleep command (i.e. mere tty echo) doesn't + trigger it, however + < youpi> running bash's echo does trigger it + < braunr> during sleep, the glibc stream functions handle I/O, while with + bash, its readline takes care of it, right ? + < youpi> /bin/echo too + < youpi> during sleep it's the tty process which handles I/O + < braunr> the write may be due to a write time update on the inode + < braunr> modification* time + < youpi> probably yes, but how so? + < youpi> ext2fs is only supposed to pass the thing to the console + translator + < braunr> not sure + < youpi> actually, ext2fs even isn't supposed to come into play when it's + about typing at the bash prompt + < youpi> once it's opened, isn't the port for /dev/console supposed to be + directly to the translator there? + < braunr> i think so + < youpi> (s/tty/term/ in what I said) + < braunr> well, it's certain + < youpi> so I don't see how ext2fs can be triggered to write an atime or + mtime + < braunr> what does rpctrace say ? + < youpi> io_read_request and io_write_request + < youpi> braunr: it doesn't happen at the login prompt + < youpi> interestingly, atime is always 3-4 secs earlier than ctime & mtime + < youpi> doesn't happen with dash + < braunr> we should implement relatime and experiment with it + < braunr> it shouldn't be hard + < youpi> well, there's noatime already + < youpi> but my point is that this update shouldn't happen + < youpi> and I believe it's the source of the i_file_acl e2fsck warning + < braunr> i wasn't saying that concerning this problem, it was just a + separate idea (noatime is more problematic than relatime) + < braunr> and i agree, it shouldn't happen :) + < youpi> ok, it's set_node_times which gets called + +IRC, freenode, #hurd, 2011-07-27 + + < antrik> BTW, I'm not sure it's still relevant; but the reason accessing + translators such as the console modifies the underlying node is that most + stat information is generally passed through + < antrik> (in some cases it might be unintentional though, simply using the + default implementation from trivfs carelessly...) + < youpi> I know + < youpi> I've seen that in the code + < antrik> OK + < youpi> it is still relevant: I still find it useless to write it on the + disk + < youpi> though w uses it to show idle time over reboot + < braunr> is it useful to keep the information across reboots ? + < youpi> for some value of "useful" for w + < braunr> i wonder what would break if this was entierly kept in memory + < youpi> nothing, probably + < youpi> note that it doesn't overload ext2fs so much, it just adds a write + every ~5s + < youpi> (at worse, i.e. when keeping showing text, for instance) + < braunr> indeed, the behaviour seems the same on linux + < antrik> ah... that explains why the disk doesn't spin down while IRC is + active... always wondered about that :-) + < youpi> that's not very power-saving, yes + < youpi> well, we might want to put /dev on ram someday diff --git a/open_issues/user-space_device_drivers.mdwn b/open_issues/user-space_device_drivers.mdwn index b8061f71..e929f2bf 100644 --- a/open_issues/user-space_device_drivers.mdwn +++ b/open_issues/user-space_device_drivers.mdwn @@ -33,6 +33,16 @@ Also see [[device drivers and IO systems]]. to IRQs. However, at least in GNU Mach, that code (`kern/eventcount.c`) doesn't seem functional at all and isn't integrated properly in the kernel. + * IRC, freenode, #hurd, 2011-07-29 + + < antrik> regarding performance of userspace drivers, there is one + thing that really adds considerable overhead: interrupt + handling. whether this is relevant very much depends on the hardware + in question. when sending many small packets over gigabit ethernet, + it might be noticable; in most other cases it's irrelevant + < youpi> some cards support interrupt coalescin + < youpi> could be supported by DDE too + ## DMA * Security considerations. @@ -52,6 +62,32 @@ Also see [[device drivers and IO systems]]. * [[GNU Mach|microkernel/mach/gnumach]] is said to have a high overhead when doing RPC calls. +## System Boot + +IRC, freenode, #hurd, 2011-07-27 + + < braunr> btw, was there any formulation of the modifications required to + have disk drivers in userspace ? + < braunr> (which would obviously need something like + initrd/initramfs/whatever and may also need the root file system not to + be the first task started) + < braunr> hm actually, we may not need initrd + < braunr> the boot loader could just load more modules + < antrik> braunr: I have described all that in my thesis report... in + German :-( + < braunr> and the boot scripts could be adjusted to pass around the right + ports + < Tekk_> braunr: yeah, we could probably load a module that kciks us into + userspace and starts the disk driver + < braunr> modules are actualy userspace executables + < Tekk_> ah + < Tekk_> so what's the issue? + < Tekk_> oh! I'm thinking the ext2fs server, which is already in userspce + < braunr> change the file systems to tell them which underlying disk driver + to use + < Tekk_> mhm + < braunr> s/disk/storage/ + # Plan diff --git a/open_issues/wine.mdwn b/open_issues/wine.mdwn index 85d35c9c..65e6c584 100644 --- a/open_issues/wine.mdwn +++ b/open_issues/wine.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2010 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2010, 2011 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -19,3 +19,51 @@ requirements Wine has: only libc / POSIX / etc., or if there are [[Samuel|samuelthibault]] suspects that *there's some need for LDT table allocation. There is kernel support for this,* however. + + +IRC, freenode, #hurd, 2011-08-11 + + < arethusa> I've been trying to make Wine work inside a Debian GNU/Hurd VM, + and to that end, I've successfully compiled the latest sources from Git + after installing the libc (devel) packages from experimental and + personally patching Wine with http://pastebin.com/rg6dx09G + +[[rg6dx09G.patch]] + + < arethusa> my question is, when trying to launch Wine, I'm seeing "wine + client error:0: sendmsg: (os/kern) invalid address" from the client side, + whereas the wineserver seems to be starting and running correctly, how + could I debug this issue further? using rpctrace doesn't seem to help, as + the trace just hangs when run on the Wine loader instead of yielding + insight + < kilobug> arethusa: isn't there a wine debuguer that can start a gdb when + wine encounters an error or something like that ? + < arethusa> it's too early for that + < kilobug> or least give you a full traceback of the wine code where the + error occur ? + < arethusa> the error is happening during initial connect to the + wineserver, in dlls/ntdll/server.c + < arethusa> but that doesn't help me figure out why sendmsg would error out + in this way + < arethusa> + http://source.winehq.org/git/wine.git/blob/HEAD:/dlls/ntdll/server.c#l361 + < azeem_> arethusa: probably some of the msghdr entries are not supported + by the Hurd's glib + < azeem_> c + < pinotree> haha, socket credentials, which we don't support yet + < azeem_> yep + < pinotree> youpi: ↑ another case ;) + < azeem_> arethusa: just implement those and it should work + < kilobug> in pflocal ? or glibc ? + < pinotree> pflocal + < arethusa> azeem_: hmm, okay, thanks + < pinotree> arethusa: their lack is a known issue, and makes things like + dbus and gamin not work + < arethusa> it's + https://www.gnu.org/software/hurd/open_issues/sendmsg_scm_creds.html and + related links I assume? + +[[sendmsg_scm_creds]] + + < youpi> yes + < pinotree> (but that patch is lame) diff --git a/open_issues/wine/rg6dx09G.patch b/open_issues/wine/rg6dx09G.patch new file mode 100644 index 00000000..510ff23f --- /dev/null +++ b/open_issues/wine/rg6dx09G.patch @@ -0,0 +1,116 @@ +diff --git a/dlls/ntdll/directory.c b/dlls/ntdll/directory.c +index 42b3639..7484608 100644 +--- a/dlls/ntdll/directory.c ++++ b/dlls/ntdll/directory.c +@@ -3145,14 +3145,14 @@ static void WINAPI read_changes_user_apc( void *arg, IO_STATUS_BLOCK *io, ULONG + static NTSTATUS read_changes_apc( void *user, PIO_STATUS_BLOCK iosb, NTSTATUS status, void **apc ) + { + struct read_changes_info *info = user; +- char data[PATH_MAX]; ++ char data[4096]; + NTSTATUS ret; + int size; + + SERVER_START_REQ( read_change ) + { + req->handle = wine_server_obj_handle( info->FileHandle ); +- wine_server_set_reply( req, data, PATH_MAX ); ++ wine_server_set_reply( req, data, 4096 ); + ret = wine_server_call( req ); + size = wine_server_reply_size( reply ); + } +diff --git a/dlls/ntdll/signal_i386.c b/dlls/ntdll/signal_i386.c +index 6c8e8e2..e949227 100644 +--- a/dlls/ntdll/signal_i386.c ++++ b/dlls/ntdll/signal_i386.c +@@ -180,6 +180,36 @@ __ASM_GLOBAL_FUNC(vm86_enter, + + #endif /* linux */ + ++#ifdef __GNU__ ++ ++typedef ucontext_t SIGCONTEXT; ++ ++#define EAX_sig(context) ((context)->uc_mcontext.gregs[REG_EAX]) ++#define EBX_sig(context) ((context)->uc_mcontext.gregs[REG_EBX]) ++#define ECX_sig(context) ((context)->uc_mcontext.gregs[REG_ECX]) ++#define EDX_sig(context) ((context)->uc_mcontext.gregs[REG_EDX]) ++#define ESI_sig(context) ((context)->uc_mcontext.gregs[REG_ESI]) ++#define EDI_sig(context) ((context)->uc_mcontext.gregs[REG_EDI]) ++#define EBP_sig(context) ((context)->uc_mcontext.gregs[REG_EBP]) ++#define ESP_sig(context) ((context)->uc_mcontext.gregs[REG_ESP]) ++ ++#define CS_sig(context) ((context)->uc_mcontext.gregs[REG_CS]) ++#define DS_sig(context) ((context)->uc_mcontext.gregs[REG_DS]) ++#define ES_sig(context) ((context)->uc_mcontext.gregs[REG_ES]) ++#define SS_sig(context) ((context)->uc_mcontext.gregs[REG_SS]) ++#define FS_sig(context) ((context)->uc_mcontext.gregs[REG_FS]) ++#define GS_sig(context) ((context)->uc_mcontext.gregs[REG_GS]) ++ ++#define EFL_sig(context) ((context)->uc_mcontext.gregs[REG_EFL]) ++#define EIP_sig(context) ((context)->uc_mcontext.gregs[REG_EIP]) ++#define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO]) ++#define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR]) ++ ++#define FPU_sig(context) ((FLOATING_SAVE_AREA *)&(context)->uc_mcontext.fpregs.fp_reg_set.fpchip_state) ++#define FPUX_sig(context) NULL ++ ++#endif /* __GNU__ */ ++ + #ifdef BSDI + + #include +diff --git a/dlls/shell32/shfldr_unixfs.c b/dlls/shell32/shfldr_unixfs.c +index 9649df8..cdd1798 100644 +--- a/dlls/shell32/shfldr_unixfs.c ++++ b/dlls/shell32/shfldr_unixfs.c +@@ -369,7 +369,7 @@ static inline BOOL UNIXFS_is_pidl_of_type(LPCITEMIDLIST pIDL, SHCONTF fFilter) { + static BOOL UNIXFS_get_unix_path(LPCWSTR pszDosPath, char *pszCanonicalPath) + { + char *pPathTail, *pElement, *pCanonicalTail, szPath[FILENAME_MAX], *pszUnixPath, has_failed = 0, mb_path[FILENAME_MAX]; +- WCHAR wszDrive[] = { '?', ':', '\\', 0 }, dospath[PATH_MAX], *dospath_end; ++ WCHAR wszDrive[] = { '?', ':', '\\', 0 }, dospath[MAX_PATH], *dospath_end; + int cDriveSymlinkLen; + void *redir; + +diff --git a/dlls/winex11.drv/xrender.c b/dlls/winex11.drv/xrender.c +index ad8e08b..a8d6329 100644 +--- a/dlls/winex11.drv/xrender.c ++++ b/dlls/winex11.drv/xrender.c +@@ -2440,8 +2440,8 @@ void X11DRV_XRender_UpdateDrawable(X11DRV_PDEVICE *physDev) + return; + } + +-BOOL XRender_AlphaBlend( X11DRV_PDEVICE *devDst, X11DRV_PDEVICE *devSrc, +- struct bitblt_coords *dst, struct bitblt_coords *src, BLENDFUNCTION blendfn ) ++BOOL XRender_AlphaBlend( X11DRV_PDEVICE *devDst, struct bitblt_coords *dst, ++ X11DRV_PDEVICE *devSrc, struct bitblt_coords *src, BLENDFUNCTION blendfn ) + { + FIXME("not supported - XRENDER headers were missing at compile time\n"); + return FALSE; +diff --git a/libs/wine/ldt.c b/libs/wine/ldt.c +index 3098061..b3fee13 100644 +--- a/libs/wine/ldt.c ++++ b/libs/wine/ldt.c +@@ -96,6 +96,11 @@ static inline int set_thread_area( struct modify_ldt_s *ptr ) + #include + #endif + ++#ifdef __GNU__ ++#include ++#include ++#endif ++ + /* local copy of the LDT */ + #ifdef __APPLE__ + struct __wine_ldt_copy wine_ldt_copy = { { 0, 0, 0 } }; +@@ -203,6 +208,9 @@ static int internal_set_entry( unsigned short sel, const LDT_ENTRY *entry ) + #elif defined(__APPLE__) + if ((ret = i386_set_ldt(index, (union ldt_entry *)entry, 1)) < 0) + perror("i386_set_ldt"); ++#elif defined(__GNU__) ++ if ((ret = i386_set_ldt(mach_thread_self(), sel, (descriptor_list_t)entry, 1)) != KERN_SUCCESS) ++ perror("i386_set_ldt"); + #else + fprintf( stderr, "No LDT support on this platform\n" ); + exit(1); \ No newline at end of file -- cgit v1.2.3 From 278f76de415c83bd06146b2f25a002cf0411d025 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Tue, 6 Sep 2011 16:02:51 +0200 Subject: IRC. --- microkernel/mach/memory_object/discussion.mdwn | 43 ++- open_issues/clock_gettime.mdwn | 30 ++ open_issues/default_pager.mdwn | 28 ++ open_issues/gnumach_memory_management.mdwn | 92 ++++++ open_issues/mach_migrating_threads.mdwn | 15 + open_issues/performance.mdwn | 8 + open_issues/performance/degradation.mdwn | 14 +- open_issues/performance/ipc_virtual_copy.mdwn | 358 +++++++++++++++++++++ open_issues/time.mdwn | 16 +- .../translators_set_up_by_untrusted_users.mdwn | 43 +++ 10 files changed, 644 insertions(+), 3 deletions(-) create mode 100644 open_issues/default_pager.mdwn create mode 100644 open_issues/mach_migrating_threads.mdwn create mode 100644 open_issues/performance/ipc_virtual_copy.mdwn (limited to 'open_issues/performance') diff --git a/microkernel/mach/memory_object/discussion.mdwn b/microkernel/mach/memory_object/discussion.mdwn index a006429b..c874b255 100644 --- a/microkernel/mach/memory_object/discussion.mdwn +++ b/microkernel/mach/memory_object/discussion.mdwn @@ -10,7 +10,7 @@ License|/fdl]]."]]"""]] [[!tag open_issue_documentation open_issue_gnumach]] -IRC, freenode, #hurd, 2011-08-05 +IRC, freenode, #hurd, 2011-08-05: < neal> braunr: For instance, memory objects are great as they allow you to specify the mapping policy in user space. @@ -22,3 +22,44 @@ IRC, freenode, #hurd, 2011-08-05 < neal> I'm not sure what you mean by page cache lru appoximateion < braunr> the kernel eviction policy :) < neal> that's an implementation detail + +IRC, freenode, #hurd, 2011-09-05: + + mach isn't a true modern microkernel, it handles a lot of + resources, such as high level virtual memory and cpu time + for example, the page replacement mechanism can't be implemented + outside the kernel + yet, it provides nothing to userspace server to easily allocate + resources on behalf of clients + so, when a thread calls an RPC, the cpu time used to run that RPC + is accounted on the server task + the hurd uses lots of external memory managers + +[[external_pager_mechanism]]. + + but they can't decide how to interact with the page cache + the kernel handles the page cache, and initiates the requests to + the pagers + braunr, why can't they decide that? + because it's implemented in the kernel + and there is nothing provided by mach to do that some other way + braunr: you probably already know this, but the problem with client + requests being accounted on behalf the server, is fixed in Mach with + Migrating Threads + +[[open_issues/mach_migrating_threads]]. + + slpz_: migrating threads only fix the issue for the resources + managed by mach, not the external servers + slpz_: but it's a (imo necessary) step to completely solve the + issue + in addition to being a great feature for performance (lighter + context switchers, less state to track) + it also helps priority inversion problems + braunr: I was referring just to cpu-time, but I agree with you an + interface change is needed for external pagers + slpz_: servers in general, not necessarily pagers + as a way to mitigate the effect of Mach paging out to external + pagers, the folks at OSF implemented an "advisory pageout", so servers + are "warned" that they should start paging out, and can decide which + pages are going to be flushed by themselves diff --git a/open_issues/clock_gettime.mdwn b/open_issues/clock_gettime.mdwn index c06edc9b..5345ed6b 100644 --- a/open_issues/clock_gettime.mdwn +++ b/open_issues/clock_gettime.mdwn @@ -39,3 +39,33 @@ IRC, freenode, #hurd, 2011-08-26: < youpi> yes, it should work < braunr> sure < youpi> and that's the way I was considering implementing it + +IRC, freenode, #hurd, 2011-09-06: + + yeah, i had a draft of improved idea for also handling + nanoseconds + pinotree: Ah, nice, I thought about nanoseconds as well. + pinotree, youpi: This memory page is all-zero by default, + right? + Can't we then say that its last int is a version code, and if + it is 0 (as it is now), we only have the normal mapped time field, if it + is 1, we also have the monotonic cliock and ns precision on address 8 and + 16 (or whatever)? + In case that isn't your plan anyway. + it's all-zero, yes + Or, we say if a field is != 0 it is valid. + making the last int a version code limits the size to one page + I was thinking a field != 0 being valid is simpler + but it's probably a problem too + in that glibc usually caches whether interfaces are supported + Wrap-around? + for some clocks, it may be valid that the value is 0 + wrap-around is another issue too + Well, then we can do the version-field thing, but put it right + after the current time field (address 8, I think)? + yes + it's a bit ugly, but it's hidden behind the structure + It's not too bad, I think. + yes + And it will forever be a witness of the evolving of this + map_time interface. :-) diff --git a/open_issues/default_pager.mdwn b/open_issues/default_pager.mdwn new file mode 100644 index 00000000..189179c6 --- /dev/null +++ b/open_issues/default_pager.mdwn @@ -0,0 +1,28 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_gnumach]] + +IRC, freenode, #hurd, 2011-08-31: + + braunr: do you have any idea what could cause the paging errors + long before swap is exhausted? + antrik: not really, but i know every project based on the mach vm + have rewritten their swap pager + (and also I/O performance steadily dropping before that point is + reached?) + hm + there could too many things + perhaps we could "borrow" from one of them? :-) + map entry fragmentation for example + the freebsd one is the only possible candidate + uvm is too different + dragonflybsd maybe, but it's very close to freebsd + i didn't look at darwin/xnu diff --git a/open_issues/gnumach_memory_management.mdwn b/open_issues/gnumach_memory_management.mdwn index a728fc9d..1fe2f9be 100644 --- a/open_issues/gnumach_memory_management.mdwn +++ b/open_issues/gnumach_memory_management.mdwn @@ -1320,3 +1320,95 @@ There is a [[!FF_project 266]][[!tag bounty]] on this task. < braunr> i hope it helped you learn about memory allocation, virtual memory, gnu mach and the hurd in general :) < antrik> indeed :-) + + +# IRC, freenode, #hurd, 2011-09-06 + + [some performance testing] + i'm not sure such long tests are relevant but let's assume balloc + is slower + some tuning is needed here + first, we can see that slab allocation occurs more often in balloc + than page allocation does in zalloc + so yes, as slab allocation is slower (have you measured which part + actually is slow ? i guess it's the kmem_alloc call) + the whole process gets a bit slower too + I used alloc_size = 4096 for zalloc + i don't know what that is exactly + but you can't hold 500 16 bytes buffers in a page so zalloc must + have had free pages around for that + I use kmem_alloc_wired + if you have time, measure it, so that we know how much it accounts + for + where are the results for dealloc ? + I can't give you result right now because internet works very + bad. But for first DEALLOC result are the same, exept some cases when it + takes balloc for more than 1000 ticks + must be the transfer from the cpu layer to the slab layer + as to kmem_alloc_wired. I think zalloc uses this function too for + allocating objects in zone I test. + mcsim: yes, but less frequently, which is why it's faster + mcsim: another very important aspect that should be measured is + memory consumption, have you looked into that ? + I think that I made too little iterations in test SMALL + If I increase constant SMALL_TESTS will it be good enough? + mcsim: i don't know, try both :) + if you increase the number of iterations, balloc average time will + be lower than zalloc, but this doesn't remove the first long + initialization step on the allocated slab + SMALL_TESTS to 500, I mean + i wonder if maintaining the slabs sorted through insertion sort is + what makes it slow + braunr: where do you sort slabs? I don't see this. + mcsim: mem_cache_alloc_from_slab and its free counterpart + mcsim: the mem_source stuff is useless in gnumach, you can remove + it and directly call the kmem_alloc/free functions + But I have to make special allocator for kernel map entries. + ah right + btw. It turned out that 256 entries are not enough. + that's weird + i'll make a patch so that the mem_source code looks more like what + i have in x15 then + about the results, i don't think the slab layer is that slow + it's the cpu_pool_fill/drain functions that take time + they preallocate many objects (64 for your objects size if i'm + right) at once + mcsim: look at the first result page: some times, a number around + 8000 is printed + the common time (ticks, whatever) for a single object is 120 + 8132/120 is 67, close enough to the 64 value + I forgot about SMALL tests here are they: + http://paste.debian.net/128533/ (balloc) http://paste.debian.net/128534/ + (zalloc) + braunr: why do you divide 8132 by 120? + mcsim: to see if it matches my assumption that the ~8000 number + matches the cpu_pool_fill call + braunr: I've got it + mcsim: i'd be much interested in the dealloc results if you can + paste them too + dealloc: http://paste.debian.net/128589/ + http://paste.debian.net/128590/ + mcsim: thanks + second dealloc: http://paste.debian.net/128591/ + http://paste.debian.net/128592/ + mcsim: so the main conclusion i retain from your tests is that the + transfers from the cpu and the slab layers are what makes the new + allocator a bit slower + OPERATION_SMALL dealloc: http://paste.debian.net/128593/ + http://paste.debian.net/128594/ + mcsim: what needs to be measured now is global memory usage + braunr: data from /proc/vmstat after kernel compilation will be + enough? + mcsim: let me check + mcsim: no it won't do, you need to measure kernel memory usage + the best moment to measure it is right after zone_gc is called + Are there any facilities in gnumach for memory measurement? + it's specific to the allocators + just count the number of used pages + after garbage collection, there should be no free page, so this + should be rather simple + ok + braunr: When I measure memory usage in balloc, what formula is + better cache->nr_slabs * cache->bufs_per_slab * cache->buf_size or + cache->nr_slabs * cache->slab_size? + the latter diff --git a/open_issues/mach_migrating_threads.mdwn b/open_issues/mach_migrating_threads.mdwn new file mode 100644 index 00000000..5a70aac5 --- /dev/null +++ b/open_issues/mach_migrating_threads.mdwn @@ -0,0 +1,15 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_gnumach]] + + + + * [[microkernel/mach/memory_object/discussion]] diff --git a/open_issues/performance.mdwn b/open_issues/performance.mdwn index 54f3ce39..2fd34621 100644 --- a/open_issues/performance.mdwn +++ b/open_issues/performance.mdwn @@ -30,3 +30,11 @@ call|/glibc/fork]]'s case. --- * [[Degradation]] + + * [[fork]] + + * [[IPC_virtual_copy]] + + * [[microbenchmarks]] + + * [[microkernel_multi-server]] diff --git a/open_issues/performance/degradation.mdwn b/open_issues/performance/degradation.mdwn index 5db82e31..db759308 100644 --- a/open_issues/performance/degradation.mdwn +++ b/open_issues/performance/degradation.mdwn @@ -18,7 +18,7 @@ Thomas Schwinge) > tree, reboot, build it again (1st): back to 11 h. Remove build tree, build > it again (2nd): 12 h 40 min. Remove build tree, build it again (3rd): 15 h. -IRC, freenode, #hurd, 2011-07-23 +IRC, freenode, #hurd, 2011-07-23: < antrik> tschwinge: yes, the system definitely gets slower with time. after running for a couple of weeks, it needs at least twice as @@ -26,3 +26,15 @@ IRC, freenode, #hurd, 2011-07-23 < antrik> I don't know whether this is only related to swap usage, or there are some serious fragmentation issues < braunr> antrik: both could be induced by fragmentation + +--- + +During [[IPC_virtual_copy]] testing: + +IRC, freenode, #hurd, 2011-09-02: + + interestingly, running it several times has made the performance + drop quite much (i'm getting 400-500MB/s with 1M now, compared to nearly + 800 fifteen minutes ago) + manuel: i observed the same behaviour + [...] diff --git a/open_issues/performance/ipc_virtual_copy.mdwn b/open_issues/performance/ipc_virtual_copy.mdwn new file mode 100644 index 00000000..00fa7180 --- /dev/null +++ b/open_issues/performance/ipc_virtual_copy.mdwn @@ -0,0 +1,358 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +IRC, freenode, #hurd, 2011-09-02: + + what's the usual throughput for I/O operations (like "dd + if=/dev/zero of=/dev/null") in one of those Xen based Hurd machines + (*bber)? + good question + slpz: but don't use /dev/zero and /dev/null, as they don't have + anything to do with true I/O operations + braunr: in fact, I want to test the performance of IPC's virtual + copy operations + ok + braunr: sorry, the "I/O" was misleading + use bs=4096 then i guess + bs > 2k + ? + braunr: everything about 2k is copied by vm_map_copyin/copyout + s/about/above/ + braunr: MiG's stubs check for that value and generate complex (with + out_of_line memory) messages if datalen is above 2k, IIRC + ok + slpz: found it, thanks + tschwinge@strauss:~ $ dd if=/dev/zero of=/dev/null bs=4k & p=$! + && sleep 10 && kill -s INFO $p && sleep 1 && kill $p + [1] 13469 + 17091+0 records in + 17090+0 records out + 70000640 bytes (70 MB) copied, 17.1436 s, 4.1 MB/s + Note, however 10 s vs. 17 s! + And this is slow compared to heal hardware: + thomas@coulomb:~ $ dd if=/dev/zero of=/dev/null bs=4k & p=$! && + sleep 10 && kill -s INFO $p && sleep 1 && kill $p + [1] 28290 + 93611+0 records in + 93610+0 records out + 383426560 bytes (383 MB) copied, 9.99 s, 38.4 MB/s + tschwinge: is the first result on xen vm ? + I think so. + :/ + tschwinge: Thanks! Could you please try with a higher block size, + something like 128k or 256k? + strauss is on a machine that also hosts a buildd, I think. + oh ok + yes, aside either rossini or mozart + And I can confirm that with dd if=/dev/zero of=/dev/null bs=4k + running, a parallel sleep 10 takes about 20 s (on strauss). + +[[open_issues/time]] + + slpz: i'll set up xen hosts soon and can try those tests while + nothing else runs to have more accurate results + tschwinge@strauss:~ $ dd if=/dev/zero of=/dev/null bs=256k & + p=$! && sleep 10 && kill -s INFO $p && sleep 1 && kill $p + [1] 13482 + 4566+0 records in + 4565+0 records out + 1196687360 bytes (1.2 GB) copied, 13.6751 s, 87.5 MB/s + slpz: gains are logarithmic beyond the page size + thomas@coulomb:~ $ dd if=/dev/zero of=/dev/null bs=256k & p=$! + && sleep 10 && kill -s INFO $p && sleep 1 && kill $p + [1] 28295 + 6335+0 records in + 6334+0 records out + 1660420096 bytes (1.7 GB) copied, 9.99 s, 166 MB/s + This time a the sleep 10 decided to take 13.6 s. + ``Interesting.'' + tschwinge: Thanks again. The results for the Xen machine are not bad + though. I can't obtain a throughput over 50MB/s with KVM. + slpz: Want more data (bs)? Just tell. + slpz: i easily get more than that + slpz: what buffer size do you use ? + tschwinge: no, I just wanted to see if Xen has an upper limit beyond + KVM's. Thank you. + braunr: I try with different sizes until I find the maximum + throughput for a certain amount of requests (count) + braunr: are you working with KVM? + yes + slpz: my processor is a model name : Intel(R) Core(TM)2 Duo + CPU E7500 @ 2.93GHz + Linux silvermoon 2.6.32-5-amd64 #1 SMP Tue Jun 14 09:42:28 UTC + 2011 x86_64 GNU/Linux + (standard amd64 squeeze kernel) + braunr: and KVM's version? + squeeze (0.12.5) + bbl + 212467712 bytes (212 MB) copied, 9.95 s, 21.4 MB/s on kvm for me! + gnu_srs: which block size? + 4k, and 61.7 MB/s with 256k + gnu_srs: could you try with 512k and 1M? + 512k: 56.0 MB/s, 1024k: 40.2 MB/s Looks like the peak is around a + few 100k + gnu_srs: thanks! + I've just obtained 1.3GB/s with bs=512k on other (newer) machine + on which hw/vm ? + I knew this is a cpu-bound test, but I couldn't imagine faster + processors could make this difference + braunr: Intel(R) Core(TM) i5 CPU 650 @ 3.20GHz + braunr: KVM + ok + how much time did you wait before reading the result ? + that was 20x times better than the same test on my Intel(R) + Core(TM)2 Duo CPU T7500 @ 2.20GHz + braunr: I've repeated the test with a fixed "count" + My box is: Intel(R) Core(TM)2 Quad CPU Q6600 @ 2.40GHz: Max + is 67 MB/s around 140k block size + yes but how much time did dd run ? + 10 s plus/minus a few fractions of a second, + try waiting 30s + braunr: didn't check, let me try again + my kvm peaks at 130 MiB/s with bs 512k / 1M + 2029690880 bytes (2.0 GB) copied, 30.02 s, 67.6 MB/s, bs=140k + gnu_srs: i'm very surprised with slpz's result of 1.3 GiB/s + braunr: over 60 s running, same performance + nice + i wonder what makes it so fast + how much cache ? + Me too, I cannot get better values than around 67 MB/s + gnu_srs: same questions + braunr: 4096KB, same as my laptop + slpz: l2 ? l3 ? + kvm: cache=writeback, CPU: 4096 KB + gnu_srs: this has nothing to do with the qemu option, it's about + the cpu + braunr: no idea, it's the first time I touch this machine. I going + to see if I find the model in processorfinder + under my host linux system, i get a similar plot, that is, + performance drops beyond bs=1M + braunr: OK, bu I gave you the cache size too, same as slpz. + i wonder what dd actually does + read() and writes i guess + braunr: read/write repeatedly, nothing fancy + slpz: i don't think it's a good test for virtual copy + io_read_request, vm_deallocate, io_write_request, right + slpz: i really wonder what it is about i5 that improves speed so + much + braunr: me too + braunr: L2: 2x256KB, L3: 4MB + and something calling "SmartCache" + slpz: where did you find these values? + gnu_srs: ark.intel.com and wikipedia + aha, cpuinfo just gives cache size. + that "SmartCache" thing seems to be just L2 cache sharing between + cores. Shouldn't make a different since we're using only one core, and I + don't see KVM hooping between them. + with bs=256k: 7004487680 bytes (7.0 GB) copied, 10 s, 700 MB/s + (qemu/kvm, 3 * Intel(R) Xeon(R) E5504 2GHz, cache size 4096 KB) + manuel: did you try with 512k/1M? + bs=512k: 7730626560 bytes (7.7 GB) copied, 10 s, 773 MB/s + bs=1M: 7896825856 bytes (7.9 GB) copied, 10 s, 790 MB/s + manuel: those are pretty good numbers too + xeon processor + lshw gave me: L1 Cache 256KiB, L2 cache 4MiB + sincerely, I've never seen Hurd running this fast. Just checked + "uname -a" to make sure I didn't take the wrong image :-) + for bs=256k, 60s: 40582250496 bytes (41 GB) copied, 60 s, 676 MB/s + slpz: i think you can assume processor differences alter raw + copies too much to get any valuable results about virtual copy operations + you need a specialized test program + and bs=512k, 60s, 753 MB/s + braunr: I'm using the mach_perf suite from OSFMach to do the + "serious" testing. I just wanted a non-synthetic test to confirm the + readings. + +[[!taglink open_issue_gnumach]] -- have a look at *mach_perf*. + + manuel: how much cache ? 2M ? + slpz: ok + manuel: hmno, more i guess + braunr: /proc/cpuinfo says cache size : 4096 KB + ok + manuel: performance should drop beyond bs=2M + but that's not relevant anyway + Linux: bs=1M, 10.8 GB/s + I think this difference is too big to be only due to a bigger amount + of CPU cycles... + slpz: clearly + gnu_srs: your host system has 64 or 32 bits? + braunr: I'm going to investigate a bit + but this accidental discovery just made my day. We're able to run + Hurd at decent speeds on newer hardware! + slpz: what result do you get with the same test on your host + system ? + interestingly, running it several times has made the performance + drop quite much (i'm getting 400-500MB/s with 1M now, compared to nearly + 800 fifteen minutes ago) + +[[Degradataion]]. + + braunr: probably an almost infinite throughput, but I don't consider + that a valid test, since in Linux, the write operation to "/dev/null" + doesn't involve memory copying/moving + manuel: i observed the same behaviour + slpz: Host system is 64 bit + slpz: it doesn't on the hurd either + slpz: (under 2k, that is) + over* + braunr: humm, you're right, as the null translator doesn't "touch" + the memory, CoW rules apply + slpz: the only thing which actually copies things around is dd + probably by simply calling read() + which gets its result from a VM copy operation, but copies the + content to the caller provided buffer + then vm_deallocate() the data from the storeio (zero) translator + if storeio isn't too dumb, it doesn't even touch the transfered + buffer (as anonymous vm_map()ped memory is already cleared) + +[[!taglink open_issue_documentation]] + + so this is a good test for measuring (profiling?) our ipc overhead + and possibly the vm mapping operations (which could partly explain + why the results get worse over time) + manuel: can you run vminfo | wc -l on your gnumach process ? + braunr: Yes, unless some special situation apply, like the source + address/offset being unaligned, or if the translator decides to return + the result in a different buffer (which I assume is not the case for + storeio/zero) + braunr: 35 + slpz: they can't be unaligned, the vm code asserts that + manuel: ok, this is normal + braunr: address/offset from read() + slpz: the caller provided buffer you mean ? + braunr: yes, and the offset of the memory_object, if it's a pager + based translator + slpz: highly unlikely, the compiler chooses appropriate alignments + for such buffers + braunr: in those cases, memcpy is used over vm_copy + slpz: and the glibc memcpy() optimized versions can usually deal + with that + slpz: i don't get your point about memory objects + slpz: requests on memory objects always have aligned values too + braunr: sure, but can't deal with the user requesting non + page-aligned sizes + slpz: we're considering our dd tests, for which we made sure sizes + were page aligned + braunr: oh, I was talking in a general sense, not just in this dd + tests, sorry + by the way, dd on the host tops at 12 GB/s with bs=2M + that's consistent with our other results + slpz: you mean, even on your i5 processor with 1.3 GiB/s on your + hurd kvm ? + braunr: yes, on the GNU/Linux which is running as host + slpz: well that's not consistent + braunr: consistent with what? + slpz: i get roughly the same result on my host, but ten times less + on my hurd kvm + slpz: what's your kernel/kvm versions ? + 2.6.32-5-amd64 (debian's build) 0.12.5 + same here + i'm a bit clueless + why do i only get 130 MiB/s where you get 1.3 .. ? :) + well, on my laptop, where Hurd on KVM tops on 50 MB/s, Linux gets a + bit more than 10 GB/s + see + slpz: reduce bs to 256k and test again if you have time please + braunr: on which system? + slpz: the fast one + (linux host) + braunr: Hurd? + ok + 12 GB/s + i get 13.3 + same for 128k, only at 64k starts dropping + maybe, on linux we're being limited by memory speed, while on Hurd's + this test is (much) more CPU-bound? + slpz: maybe + too bad processor stalls aren't easy to measure + braunr: that's very true. It's funny when you read a paper which + measures performance by cycles on an old RISC processor. That's almost + impossible to do (with reliability) nowadays :-/ + I wonder which throughput can achieve Hurd running bare-metal on + this machine... + both the Xeon and the i5 use cores based on the Nehalem + architecture + apparently Nehalem is where Intel first introduces nested page + tables + which pretty much explains the considerably lower overhead of VM + magic + antrik, what are nested page tables? (sounds like the 4-level page + tables we already have on amd64, or 2-level or 3-level on x86 pae) + page tables were always 2-level on x86 + that's unrelated + nested page tables means there is another layer of address + translation, so the VMM can do it's own translation and doesn't care what + the guest system does => no longer has to intercept all page table + manipulations + antrik: do you imply it only applies to virtualized systems ? + braunr: yes + antrik: Good guess. Looks like Intel's EPT are doing the trick by + allowing the guest OS deal with its own page faults + antrik: next monday, I'll try disabling EPT support in KVM on that + machine (the fast one). That should confirm your theory empirically. + this also means that there're too many page faults, as we should be + doing virtual copies of memory that is not being accessed + and looking at how the value of "page faults" in "vmstat" increases, + shows that page faults are directly proportional to the number of pages + we are asking from the translator + I've also tried doing a long read() directly, to be sure that "dd" + is not doing something weird, and it shows the same behaviour. + slpz: dd does copy buffers + slpz: i told you, it's not a good test case for pure virtual copy + evaluation + antrik: do you know if xen benefits from nested page tables ? + no idea + +[[!taglink open_issue_xen]] + + braunr: but my small program doesn't, and still provokes a lot of + page faults + slpz: are you certain it doesn't ? + braunr: looking at google, it looks like recent Xen > 3.4 supports + EPT + ok + i'm ordering my new server right now, core i5 :) + braunr: at least not explicitily. I need to look at MiG stubs again, + I don't remember if they do something weird. + braunr: sandybridge or nehalem? :-) + antrik: no idea + does it tell a model number? + not yet + but i don't have a choice for that, so i'll order it first, check + after + hehe + I'm not sure it makes all that much difference anyways for a + server... unless you are running it at 100% load ;-) + antrik: i'm planning on running xen guests suchs as new buildd + hm... note though that some of the nehalem-generation i5s were + dual-core, while all the new ones are quad + it's a quad + the newer generation has better performance per GHz and per + Watt... but considering that we are rather I/O-limited in most cases, it + probably won't make much difference + not sure whether there are further virtualisation improvements + that could be relevant... + buildds spend much time running gcc, so even such improvements + should help + there, server ordered :) + antrik: model name : Intel(R) Core(TM) i5-2400 CPU @ 3.10GHz + +IRC, freenode, #hurd, 2011-09-06: + + youpi: what machines are being used for buildd? Do you know if they + have EPT/RVI? + we use PV Xen there + I think Xen could also take advantage of those technologies. Not + sure if only in HVM or with PV too. + only in HVM + in PV it does not make sense: the guest already provides the + translated page table + which is just faster than anything else diff --git a/open_issues/time.mdwn b/open_issues/time.mdwn index eda5b635..ab239aef 100644 --- a/open_issues/time.mdwn +++ b/open_issues/time.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2009 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2009, 2011 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -53,3 +53,17 @@ GNU time's *elapsed* value is off by some factor. As above; also here all the running time should be attriuted to *user* time. This is probably a [[!taglink open_issue_gnumach]]. + + +# 2011-09-02 + +Might want to revisit this, and take Xen [[!tag open_issue_xen]] into account +-- I believe flubber has already been Xenified at that time. + + +## IRC, freenode, #hurd, 2011-09-02 + +While testing some [[performance/IPC_virtual_copy]] performance issues: + + And I can confirm that with dd if=/dev/zero of=/dev/null bs=4k + running, a parallel sleep 10 takes about 20 s (on strauss). diff --git a/open_issues/translators_set_up_by_untrusted_users.mdwn b/open_issues/translators_set_up_by_untrusted_users.mdwn index cee7a2bc..36fe5438 100644 --- a/open_issues/translators_set_up_by_untrusted_users.mdwn +++ b/open_issues/translators_set_up_by_untrusted_users.mdwn @@ -281,3 +281,46 @@ Protection](https://wiki.ubuntu.com/SecurityTeam/Roadmap/KernelHardening#Symlink and [Hardlink Protection](https://wiki.ubuntu.com/SecurityTeam/Roadmap/KernelHardening#Hardlink_Protection) do bear some similarity with the issue we're discussing here. + + +# IRC, freenode, #hurd, 2011-08-31 + + I don't see any problems with following only translators of + trusted users + where to store the list of trusted users? + is there a way to access the underlying node, which for /dev + entries belongs to root? + youpi: why a list of trusted users? Does it not suffice to + require /hurd/trust set by root or ourselves? + ArneBab: just because that's what antrik suggests, so I ask him for + more details + ah, ok + youpi: probably make them members of a group + of course that doesn't allow normal users to add their own trusted + users... but that's not the only limitation of the user-based + authentication mechanism, so I wouldn't consider that an extra problem + ArneBab: we can't set a translator on top of another user's + translator in general + root could, but that's not very flexible... + the group-based solution seems more useful to me + antrik: why can’t we? + also note that you can't set passive translators on top of other + translators + ArneBab: because we can only set translators on our own nodes + active ones, too? + yes + antrik: I always thought I could… + but did not test it + antrik: so I need a subhurd to change nodes which do not belong + to me? + * ArneBab in that case finally understands why you like subhurds so much: + That should be my normal right + it should be your normal right to change stuff not belonging to + you? that's an odd world view :-) + subhurds don't really have anything to do with it + change it in a way that only I see the changes + you need local namespaces to allow making local modifications to + global resources + it should be one's normal right to change the view one has of it + we discussed that once actually I believe... + err... private namespaces I mean -- cgit v1.2.3 From 219988e74ba30498a1c5d71cf557913a70ccca91 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Mon, 3 Oct 2011 20:49:54 +0200 Subject: IRC. --- faq/which_microkernel/discussion.mdwn | 61 ++++ hurd/translator/tmpfs/tmpfs_vs_defpager.mdwn | 115 ++++++- open_issues/code_analysis.mdwn | 32 +- open_issues/default_pager.mdwn | 3 + open_issues/gnumach_memory_management.mdwn | 365 +++++++++++++++++++++ open_issues/libmachuser_libhurduser_rpc_stubs.mdwn | 50 ++- open_issues/mach-defpager_vs_defpager.mdwn | 24 +- open_issues/mach_vm_pageout.mdwn | 19 ++ open_issues/osf_mach.mdwn | 237 +++++++++++++ open_issues/performance/degradation.mdwn | 16 +- .../io_system/clustered_page_faults.mdwn | 23 ++ open_issues/performance/ipc_virtual_copy.mdwn | 37 +++ open_issues/resource_management_problems.mdwn | 4 + .../resource_management_problems/pagers.mdwn | 322 ++++++++++++++++++ open_issues/rework_gnumach_ipc_spaces.mdwn | 2 +- .../translators_set_up_by_untrusted_users.mdwn | 21 ++ 16 files changed, 1298 insertions(+), 33 deletions(-) create mode 100644 open_issues/mach_vm_pageout.mdwn create mode 100644 open_issues/osf_mach.mdwn create mode 100644 open_issues/resource_management_problems/pagers.mdwn (limited to 'open_issues/performance') diff --git a/faq/which_microkernel/discussion.mdwn b/faq/which_microkernel/discussion.mdwn index 9ef3b915..7ea131e9 100644 --- a/faq/which_microkernel/discussion.mdwn +++ b/faq/which_microkernel/discussion.mdwn @@ -1,3 +1,20 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_documentation]] + +[[!toc]] + + +# Olaf, 2011-04-10 + This version mixes up three distinct phases: rewrite from scratch; redesign; own microkernel. @@ -31,3 +48,47 @@ to the Coyotos port -- which after all is what the title promises... All in all, I still think my text was better. If you have any conerns with it, please discuss them... + + +# IRC, freenode, #hurd, 2011-09-27 + + Does anyone remember/know if/why not seL4 was considered for + hurd-l4? Is anyone aware of any differences between seL4 and coyotos? + + +## 2011-09-28 + + cjuner: the seL4 project was only at the beginning when the + decision was made. so was Coyotos, but Shapiro promised back then that + building on EROS, it would be done very fast (a promise he couldn't keep + BTW); plus he convinced the people in question that it's safer to build + on his ideas... + it doesn't really matter though, as by the time the ngHurd people + were through with Coyotos, they had already concluded that it doesn't + make sense to build upon *any* third-party microkernel + antrik, what was the problem with coyotos? what would be the + problem with sel4 today? + antrik, yes I did read the FAQ. It doesn't mention seL4 at all + (there isn't even much on the hurd-l4 mailing lists, I think that being + due to seL4 not having been released at that point?) and it does not + specify what problems they had with coyotos. + cjuner: it doesn't? I thought it mentioned "newer L4 variants" or + something like that... but the text was rewritten a couple of times, so I + guess it got lost somewhere + cjuner: unlike original L4, it's probably possible to implement a + system like the Hurd on top on seL4, just like on top of + Coyotos. however, foreign microkernels are always created with foreign + design ideas in mind; and building our own design around them is always + problematic. it's problematic with Mach, and it will be problematic with + any other third-party microkernel + Coyotos specifically has different ideas about memory protection, + different ideas about task startup, different ideas about memory + handling, and different ideas about resource allocation + antrik, do any specific problems of the foreign designs, + specifically of seL4 or coyotos come to mind? + cjuner: I mentioned several for Coyotos. I don't have enough + understanding of the matters to go into much more detail + (and I suspect you don't have enough understanding of these + matters to take away anything useful from more detail ;-) ) + I could try to explain the issues I mentioned for Coyotos (as far + as I understand them), but would that really help you? diff --git a/hurd/translator/tmpfs/tmpfs_vs_defpager.mdwn b/hurd/translator/tmpfs/tmpfs_vs_defpager.mdwn index f0eb473c..ecebe662 100644 --- a/hurd/translator/tmpfs/tmpfs_vs_defpager.mdwn +++ b/hurd/translator/tmpfs/tmpfs_vs_defpager.mdwn @@ -8,9 +8,10 @@ Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license is included in the section entitled [[GNU Free Documentation License|/fdl]]."]]"""]] -[[!tag open_issue_hurd]] +[[!tag open_issue_gnumach open_issue_hurd]] -\#hurd, freenode, 2010 + +# IRC, freenode, #hurd, 2010 humm... why does tmpfs try to use the default pager? that's a bad idea, and probably will never work correctly... @@ -120,3 +121,113 @@ License|/fdl]]."]]"""]] memory, gives them a reference to the default pager by calling vm_object_pager_create this is not really important, but worth noting ;-) + + +# IRC, freenode, #hurd, 2011-09-28 + + mcsim: "Fix tmpfs" task should be called "Fix default pager" :-) + mcsim: I've been thinking about modifying tmpfs to actually have + it's own storeio based backend, even if a tmpfs with storage sounds a bit + stupid. + mcsim: but I don't like the idea of having translators messing up + with the default pager... + slpz: messing up?... + antrik: in the sense of creating a number of arbitrarily sized + objects + slpz: well, it doesn't really matter much whether a process + indirectly eats up arbitrary amounts of swap through tmpfs, or directly + through vm_allocate()... + though admittedly it's harder to implement resource limits with + tmpfs + antrik: but I've talked about having its own storeio device as + backend. This way Mach can pageout memory to tmpfs if it's needed. + Do I understand correctly that the goal of tmpfs task is to create + tmpfs in RAM? + mcsim: It is. But it also needs some kind of backend, just in case + it's ordered to page out data to free some system's memory. + mcsim: Nowadays, this backend is another translator that acts as + default pager for the whole system + slpz: pageout memory to tmpfs? not sure what you mean + antrik: I mean tmpfs acting as its own pager + slpz: you mean tmpfs not using the swap partition, but some other + backing store? + antrik: Yes. + +See also: [[open_issues/resource_management_problems/pagers]]. + + slpz: I don't think an extra backing store for tmpfs is a good + idea. the whole point of tmpfs is not having a backing store... TBH, I'd + even like to see a single backing store for anonymous memory and named + files + antrik: But you need a backing store, even if it's the default pager + :-) + antrik: The question is, Should users share the same backing store + (swap space) or provide their own? + slpz: not sure what you mean by "users" in this context :-) + antrik: Real users with the ability of setting tmpfs translators + essentially, I'd like to have a single partition that contains + both swap space and the main filesystem (at least /tmp, but probably also + all of /run, and possibly even /home...) + but that's a bit off-topic :-) + well, ideally all storage should be accounted to a user, + regardless whether it's swapped out anonymous storage, temporary named + files, or permanent files + antrik: you could use a file as backend for tmpfs + slpz: what's the point of using tmpfs then? :-) + (and then store the file in another tmpfs) + antrik: mach-defpager could be modified to use storeio instead of + Mach's device_* operations, but by the way things work right now, that + could be dangerous, IMHO + pinotree: hehe + .. recursive tmpfs'es ;) + slpz: hm, sounds interesting + antrik: tmpfs would try to keep data in memory always it's possible + (not calling m_o_lock_request would do the trick), but if memory is + scarce an Mach starts paging out, it would write it to that + file/device/whatever + ideally, all storage used by system tasks for swapped out + anonymous memory as well as temporary named files would end up on the + /run partition; while all storage used by users would end up in /home/* + if users share a partition, some explicit storage accounting would + be useful too... + slpz: is that any different from what "normal" filesystems do?... + (and *should* it be different?...) + antrik: Yes, as most FS try to synchronize to disk at a reasonable + rate, to prevent data losses. + antrik: tmpfs would be a FS that wouldn't synchronize until it's + forced to do that (which, by the way, it's what's currently happening + with everyone that uses the default pager). + slpz: hm, good point... + antrik: Also, metadata in never written to disk, only kept in memory + (which saves a lot of I/O, too). + antrik: In fact, we would be doing the same as every other kernel + does, but doing it explicitly :-) + I see the use in separating precious data (in permanent named + files) from temporary state (anonymous memory and temporary named files) + -- but I'm not sure whether having a completely separate FS for the + temporary data is the right approach for that... + antrik: And giving the user the option to specify its own storage, + so we don't limit him to the size established for swap by the super-user. + either way, that would be a rather radical change... still would + be good to fix tmpfs as it is first if possible + as for limited swap, that's precisely why I'd prefer not to have + an extra swap partition at all... + antrik: It's not much o fa change, it's how it works right now, with + the exception of replacing the default pager with its own. + antrik: I think it's just a matter of 10-20 hours, as + much. Including testing. + antrik: It could be forked with another name, though :-) + slpz: I don't mean radical change in the implementation... but a + radical change in the way it would be used + antrik: I suggest "almosttmpfs" as the name for the forked one :-P + hehe + how about lazyfs? + antrik: That sound good to me, but probably we should use a more + descriptive name :-) + + +## 2011-09-29 + + slpz, antrik: There is a defpager in the Hurd code. It is not + currently being used, and likely incomplete. It is backed by libstore. + I have never looked at it. diff --git a/open_issues/code_analysis.mdwn b/open_issues/code_analysis.mdwn index 552cd2c9..7495221b 100644 --- a/open_issues/code_analysis.mdwn +++ b/open_issues/code_analysis.mdwn @@ -19,7 +19,12 @@ analysis|performance]], [[formal_verification]], as well as general [[!toc]] -# Suggestions +# Bounty + +There is a [[!FF_project 276]][[!tag bounty]] on some of these tasks. + + +# Static * [[GCC]]'s warnings. Yes, really. @@ -52,8 +57,6 @@ analysis|performance]], [[formal_verification]], as well as general * - * [[community/gsoc/project_ideas/Valgrind]] - * [Smatch](http://smatch.sourceforge.net/) * [Parfait](http://labs.oracle.com/projects/parfait/) @@ -66,7 +69,12 @@ analysis|performance]], [[formal_verification]], as well as general * [sixgill](http://sixgill.org/) - * [Coverity](http://www.coverity.com/) -- commercial? + * [Coverity](http://www.coverity.com/) (nonfree?) + + +# Dynamic + + * [[community/gsoc/project_ideas/Valgrind]] * @@ -76,7 +84,15 @@ analysis|performance]], [[formal_verification]], as well as general * - -# Bounty - -There is a [[!FF_project 276]][[!tag bounty]] on some of these tasks. + * IRC, freenode, #glibc, 2011-09-28 + + two things you can do -- there is an environment variable + (DEBUG_MALLOC_ iirc?) that can be set to 2 to make ptmalloc (glibc's + allocator) more forceful and verbose wrt error checking + another is to grab a copy of Tor's source tree and copy out + OpenBSD's allocator (its a clearly-identifyable file in the tree); + LD_PRELOAD it or link it into your app, it is even more aggressive + about detecting memory misuse. + third, Red hat has a gdb python plugin that can instrument + glibc's heap structure. its kinda handy, might help? + MALLOC_CHECK_ was the envvar you want, sorry. diff --git a/open_issues/default_pager.mdwn b/open_issues/default_pager.mdwn index 189179c6..18670c75 100644 --- a/open_issues/default_pager.mdwn +++ b/open_issues/default_pager.mdwn @@ -18,6 +18,9 @@ IRC, freenode, #hurd, 2011-08-31: have rewritten their swap pager (and also I/O performance steadily dropping before that point is reached?) + +[[performance/degradation]] (?). + hm there could too many things perhaps we could "borrow" from one of them? :-) diff --git a/open_issues/gnumach_memory_management.mdwn b/open_issues/gnumach_memory_management.mdwn index 1fe2f9be..fb3d6895 100644 --- a/open_issues/gnumach_memory_management.mdwn +++ b/open_issues/gnumach_memory_management.mdwn @@ -1412,3 +1412,368 @@ There is a [[!FF_project 266]][[!tag bounty]] on this task. better cache->nr_slabs * cache->bufs_per_slab * cache->buf_size or cache->nr_slabs * cache->slab_size? the latter + + +# IRC, freenode, #hurd, 2011-09-07 + + braunr: I've disabled calling of mem_cpu_pool_fill and allocator + became faster + mcsim: sounds nice + mcsim: i suspect the free path might not be as fast though + results for first calling: http://paste.debian.net/128639/ second: + http://paste.debian.net/128640/ and with many alloc/free: + http://paste.debian.net/128641/ + mcsim: thanks + best result are for second call: average time decreased from 159.56 + to 118.756 + First call slightly worse, but this is because I've added some + profiling code + i still see some ~8k lines in 128639 + even some around ~12k + I think this is because of mem_cache_grow I'm investigating it now + i guess so too + I've measured time for first call in cache and from about 22000 + mem_cache_grow takes 20000 + how did you change the code so that it doesn't call + mem_cpu_pool_fill ? + is the cpu layer still used ? + http://paste.debian.net/128644/ + don't forget the free path + mcsim: anyway, even with the previous slightly slower behaviour we + could observe, the performance hit is negligible + Is free path a compilation? (I'm sorry for my english) + mcsim: mem_cache_free + mcsim: the last two measurements i'd advise are with big (>4k) + object sizes and, really, kernel allocator consumption + http://paste.debian.net/128648/ http://paste.debian.net/128646/ + http://paste.debian.net/128649/ (first, second, small) + mcsim: these numbers are closer to the zalloc ones, aren't they ? + deallocating slighty faster too + it may not be the case with larger objects, because of the use of + a tree + yes, they are closer + but then, i expect some space gains + the whole thing is about compromise + ok. I'll try to measure them today. Anyway I'll post result and you + could read them in the morning + at least, it shows that the zone allocator was actually quite good + i don't like how the code looks, there are various hacks here and + there, it lacks self inspection features, but it's quite good + and there was little room for true improvement in this area, like + i told you :) + (my allocator, like the current x15 dev branch, focuses on mp + machines) + mcsim: thanks again for these numbers + i wouldn't have had the courage to make the tests myself before + some time eh + braunr: hello. Look at the small_4096 results + http://paste.debian.net/128692/ (balloc) http://paste.debian.net/128693/ + (zalloc) + mcsim: wow, what's that ? :) + mcsim: you should really really include your test parameters in + the report + like object size, purpose, and other similar details + for balloc I specified only object_size = 4096 + for zalloc object_size = 4096, alloc_size = 4096, memtype = 0; + the results are weird + apart from the very strange numbers (e.g. 0 or 4429543648), none + is around 3k, which is the value matching a kmem_alloc call + happy to see balloc behaves quite good for this size too + s/good/well/ + Oh + here is significant only first 101 lines + I'm sorry + ok + what does the test do again ? 10 loops of 10 allocs/frees ? + yes + ok, so the only slowdown is at the beginning, when the slabs are + created + the two big numbers (31844 and 19548) are strange + on the other hand time of compilation is + balloc zalloc + 38m28.290s 38m58.400s + 38m38.240s 38m42.140s + 38m30.410s 38m52.920s + what are you compiling ? + gnumach kernel + in 40 mins ? + yes + you lack hvm i guess + is it long? + I use real PC + very + ok + so it's normal + in vm it was about 2 hours) + the difference really is negligible + ok i can explain the big numbers + the slab size depends on the object size, and for 4k, it is 32k + you can store 8 4k buffers in a slab (lines 2 to 9) + so we need use kmem_alloc_* 8 times? + on line 10, the ninth object is allocated, which adds another slab + to the cache, hence the big number + no, once for a size of 32k + and then the free list is initialized, which means accessing those + pages, which means tlb misses + i guess the zone allocator already has free pages available + I see + i think you can stop performance measurements, they show the + allocator is slightly slower, but so slightly we don't care about that + we need numbers on memory usage now (at the page level) + and this isn't easy + For balloc I can get numbers if I summarize nr_slabs*slab_size for + each cache, isn't it? + yes + you can have a look at the original implementation, function + mem_info + And for zalloc I have to summarize of cur_size and then add + zalloc_wasted_space? + i don't know :/ + i think the best moment to obtain accurate values is after zone_gc + removes the collected pages + for both allocators, you could fill a stats structure at that + moment, and have an rpc copy that structure when a client tool requests + it + concerning your tests, there is another point to have in mind + the very first loop in your code shows a result of 31844 + although you disabled the call to cpu_pool_fill + but the reason why it's so long is that the cpu layer still exists + and if you look carefully, the cpu pools are created as needed on + the free path + I removed cpu_pool_drain + but not cpu_pool_push/pop i guess + http://paste.debian.net/128698/ + see, you still allocate the cpu pool array on the free path + but I don't fill it + that's not the point + it uses mem_cache_alloc + so in a call to free, you can also have an allocation, that can + potentially create a new slab + I see, so I have to create cpu_pool at the initialization stage? + no, you can't + there is a reason why they're allocated on the free path + but since you don't have the fill/drain functions, i wonder if you + should just comment out the whole cpu layer code + but hmm + no really, it's not worth the effort + even with drains/fills, the results are really good enough + it makes the allocator smp ready + we should just keep it that way + mcsim: fyi, the reason why cpu pool arrays are allocated on the + free path is to avoid recursion + because cpu pool arrays are allocated from caches just as almost + everything else + ok + summ of cur_size and then adding zalloc_wasted_space gives 0x4e1954 + but this value isn't even page aligned + For balloc I've got 0x4c6000 0x4aa000 0x48d000 + hm can you report them in decimal, >> 10 so that values are in KiB + ? + 4888 4776 4660 for balloc + 4998 for zalloc + when ? + after boot ? + boot, compile, zone_gc + and then measure + ? + I call garbage collector before measuring + and I measure after kernel compilation + i thought it took you 40 minutes + for balloc I got results at night + oh so you already got them + i can't beleive the kernel only consumes 5 MiB + before gc it takes about 9052 Kib + can i see the measurement code ? + oh, and how much ram does your machine have ? + 758 mb + 768 + that's really weird + i'd expect the kernel to consume much more space + http://paste.debian.net/128703/ + it's only dynamically allocated data + yes + ipc ports, rights, vm map entries, vm objects, and lots of other + hanging buffers + about how much is zalloc_wasted_space ? + if it's small or constant, i guess you could ignore it + about 492 + KiB + well it's another good point, mach internal structures don't imply + much overhead + or, the zone allocator is underused + + mcsim, braunr: The memory allocator project is coming along + good, as I get from your IRC messages? + tschwinge: yes, but as expected, improvements are minor + But at the very least it's now well-known, maintainable code. + yes, it's readable, easier to understand, provides self inspection + and is smp ready + there also are less hacks, but a few less features (there are no + way to avoid sleeping so it's unusable - and unused - in interrupt + handlers) + is* no way + tschwinge: mcsim did a good job porting and measuring it + + +# IRC, freenode, #hurd, 2011-09-08 + + braunr: note that the zalloc map used to be limited to 8 MiB or + something like that a couple of years ago... so it doesn't seems + surprising that the kernel uses "only" 5 MiB :-) + (yes, we had a *lot* of zalloc panics back then...) + + +# IRC, freenode, #hurd, 2011-09-14 + + braunr: hello. I've written a constructor for kernel map entries + and it can return resources to their source. Can you have a look at it? + http://paste.debian.net/130037/ If all be OK I'll push it tomorrow. + mcsim: send the patch through mail please, i'll apply it on my + copy + are you sure the cache is reapable ? + All slabs, except first I allocate with kmem_alloc_wired. + how can you be sure ? + First slab I allocate during bootstrap and use pmap_steal_memory + and further I use only kmem_alloc_wired + no, you use kmem_free + in kentry_dealloc_cache() + which probably creates a recursion + using the constructor this way isn't a good idea + constructors are good for preconstructed state (set counters to 0, + init lists and locks, that kind of things, not allocating memory) + i don't think you should try to make this special cache reapable + mcsim: keep in mind constructors are applied on buffers at *slab* + creation, not at object allocation + so if you allocate a single slab with, say, 50 or 100 objects per + slab, kmem_alloc_wired would be called that number of times + why kentry_dealloc_cache can create recursion? kentry_dealloc_cache + is called only by mem_cache_reap. + right + but are you totally sure mem_cache_reap() can't be called by + kmem_free() ? + i think you're right, it probably can't + + +# IRC, freenode, #hurd, 2011-09-25 + + braunr: hello. I rewrote constructor for kernel entries and seems + that it works fine. I think that this was last milestone. Only moving of + memory allocator sources to more appropriate place and merge with main + branch left. + mcsim: it needs renaming and reindenting too + for reindenting C-x h Tab in emacs will be enough? + mcsim: make sure which style must be used first + and what should I rename and where better to place allocator? For + example, there is no lib directory, like in x15. Should I create it and + move list.* and rbtree.* to lib/ or move these files to util/ or + something else? + mcsim: i told you balloc isn't a good name before, use something + more meaningful (kmem is already used in gnumach unfortunately if i'm + right) + you can put the support files in kern/ + what about vm_alloc? + you should prefix it with vm_ + shouldn't + it's a top level allocator + on top of the vm system + maybe mcache + hm no + maybe just km_ + kern/km_alloc.*? + no + just km + ok. + + +# IRC, freenode, #hurd, 2011-09-27 + + braunr: hello. When I've tried to speed of new allocator and bad + I've removed function mem_cpu_pool_fill. But you've said to undo this. I + don't understand why this function is necessary. Can you explain it, + please? + When I've tried to compare speed of new allocator and old* + i'm not sure i said that + i said the performance overhead is negligible + so it's better to leave the cpu pool layer in place, as it almost + doesn't hurt + you can implement the KMEM_CF_NO_CPU_POOL I added in the x15 mach + version + so that cpu pools aren't used by default, but the code is present + in case smp is implemented + I didn't remove cpu pool layer. I've just removed filling of cpu + pool during creation of slab. + how do you fill the cpu pools then ? + If object is freed than it is added to cpu poll + so you don't fill/drain the pools ? + you try to get/put an object and if it fails you directly fall + back to the slab layer ? + I drain them during garbage collection + oh + yes + you shouldn't touch the cpu layer during gc + the number of objects should be small enough so that we don't care + much + ok. I can drain cpu pool at any other time if it is prohibited to + in mem_gc. + But why do we need to fill cpu poll during slab creation? + In this case allocation consist of: get object from slab -> put it + to cpu pool -> get it from cpu pool + I've just remove last to stages + hm cpu pools aren't filled at slab creation + they're filled when they're empty, and drained when they're full + so that the number of objects they contain is increased/reduced to + a value suitable for the next allocations/frees + the idea is to fall back as little as possible to the slab layer + because it requires the acquisition of the cache lock + oh. You're right. I'm really sorry. The point is that if cpu pool + is empty we don't need to fill it first + uh, yes we do :) + Why cache locking is so undesirable? If we have free objects in + slabs locking will not take a lot if time. + mcsim: it's undesirable on a smp system + ok. + mcsim: and spin locks are normally noops on a up system + which is the case in gnumach, hence the slightly better + performances without the cpu layer + but i designed this allocator for x15, which only supports mp + systems :) + mcsim: sorry i couldn't look at your code, sick first, busy with + server migration now (new server almost ready for xen hurds :)) + ok. + I ended with allocator if didn't miss anything important:) + i'll have a look soon i hope :) + + +# IRC, freenode, #hurd, 2011-09-27 + + braunr: would it be realistic/useful to check during GC whether + all "used" objects are actually in a CPU pool, and if so, destroy them so + the slab can be freed?... + mcsim: BTW, did you ever do any measurements of memory + use/fragmentation? + antrik: I couldn't do this for zalloc + oh... why not? + (BTW, I would be interested in a comparision between using the CPU + layer, and bare slab allocation without CPU layer) + Result I've got were strange. It wasn't even aligned to page size. + Probably is it better to look into /proc/vmstat? + Because I put hooks in the code and probably I missed something + mcsim: I doubt vmstat would give enough information to make any + useful comparision... + antrik: isn't this draining cpu pools at gc time ? + antrik: the cpu layer was found to add a slight overhead compared + to always falling back to the slab layer + braunr: my idea is only to drop entries from the CPU cache if they + actually prevent slabs from being freed... if other objects in the slab + are really in use, there is no point in flushing them from the CPU cache + braunr: I meant comparing the fragmentation with/without CPU + layer. the difference in CPU usage is probably negligable anyways... + you might remember that I was (and still am) sceptical about CPU + layer, as I suspect it worsens the good fragmentation properties of the + pure slab allocator -- but it would be nice to actually check this :-) + antrik: right + antrik: the more i think about it, the more i consider slqb to be + a better solution ...... :> + an idea for when there's time + eh + hehe :-) diff --git a/open_issues/libmachuser_libhurduser_rpc_stubs.mdwn b/open_issues/libmachuser_libhurduser_rpc_stubs.mdwn index d069641e..93055b77 100644 --- a/open_issues/libmachuser_libhurduser_rpc_stubs.mdwn +++ b/open_issues/libmachuser_libhurduser_rpc_stubs.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2010 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2010, 2011 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -8,19 +8,49 @@ Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license is included in the section entitled [[GNU Free Documentation License|/fdl]]."]]"""]] -bug-hurd discussion. +[[!tag open_issue_glibc open_issue_hurd]] ---- +[[!toc]] -IRC, #hurd, 2010-08-12 - Looking at hurd.git, shouldn't {hurd,include}/Makefile's "all" target do something, and shouldn't pretty much everything depend on them? As it stands it seems that the system headers are used and the potentially newer ones never get built, except maybe on "install" (which is seemingly never called from the top-level Makefile) - I would fix it, but something tells me that maybe it's a feature :-) +# bug-hurd discussion. + + +# IRC, freenode, #hurd, 2010-08-12 + + Looking at hurd.git, shouldn't {hurd,include}/Makefile's "all" + target do something, and shouldn't pretty much everything depend on them? + As it stands it seems that the system headers are used and the + potentially newer ones never get built, except maybe on "install" (which + is seemingly never called from the top-level Makefile) + I would fix it, but something tells me that maybe it's a feature + :-) jkoenig: the headers are provided by glibc, along with the stubs - antrik, you mean, even those built from the .defs files in hurd/ ? + antrik, you mean, even those built from the .defs files in hurd/ + ? yes oh, ok then. - as glibc provides the stubs (in libhurduser), the headers also have to come from there, or they would get out of sync - hmm, shouldn't glibc also provide /usr/share/msgids/hurd.msgids, then? - jkoenig: not necessarily. the msgids describe what the servers actually understand. if the stubs are missing from libhurduser, that's no reason to leave out the msgids... + as glibc provides the stubs (in libhurduser), the headers also + have to come from there, or they would get out of sync + hmm, shouldn't glibc also provide /usr/share/msgids/hurd.msgids, + then? + jkoenig: not necessarily. the msgids describe what the servers + actually understand. if the stubs are missing from libhurduser, that's no + reason to leave out the msgids... ok this makes sense + + +# IRC, OFTC, #debian-hurd, 2011-09-29 + + pinotree: I don't like their existence. IMO (but I haven't + researched this in very much detail), every user of RPC stubs should + generated them for themselves (and glibc should directly include the + stubs it uses internally). + sounds fair + maybe they could be moved from glibc to hurd? + pinotree: Yeah; someone needs to research why we have them (or + if it's only convenience), and whether we want to keep them. + you could move them to hurd, leaving them unaltered, so binary + compatibility with eventual 3rd party users is not broken + but those using them, other than hurd itself, won't compile + anymore, so you fix them progressively diff --git a/open_issues/mach-defpager_vs_defpager.mdwn b/open_issues/mach-defpager_vs_defpager.mdwn index d6976706..f03bc67f 100644 --- a/open_issues/mach-defpager_vs_defpager.mdwn +++ b/open_issues/mach-defpager_vs_defpager.mdwn @@ -1,4 +1,4 @@ -[[!meta copyright="Copyright © 2010 Free Software Foundation, Inc."]] +[[!meta copyright="Copyright © 2010, 2011 Free Software Foundation, Inc."]] [[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable id="license" text="Permission is granted to copy, distribute and/or modify this @@ -10,16 +10,24 @@ License|/fdl]]."]]"""]] [[!tag open_issue_gnumach open_issue_hurd]] -\#hurd, 2010, end of May / beginning of June +IRC, freenode, #hurd, end of May/beginning of June 2010 whats the difference between mach-defpager and defpager? - i'm guessing defpager is a hurdish version that uses libstore but was never finished or something - found an interesting thread about it: http://mirror.libre.fm/hurd/list/msg01232.html + i'm guessing defpager is a hurdish version that uses libstore + but was never finished or something + found an interesting thread about it: + http://mirror.libre.fm/hurd/list/msg01232.html antrik: an interesting thread, indeed :-) - slpz: btw is mach-defpager linked statically but not called mach-defpager.static on purpose? - antrik: also, I can confirm that mach-defpager needs a complete rewrite ;-) + slpz: btw is mach-defpager linked statically but not called + mach-defpager.static on purpose? + antrik: also, I can confirm that mach-defpager needs a complete + rewrite ;-) pochu: I think the original defpager was launched by serverboot pochu: that could be the reason to have it static, like ext2fs - and since there's no need to execute it again during the normal operation of the system, they probably decided to not create a dynamically linked version + and since there's no need to execute it again during the normal + operation of the system, they probably decided to not create a + dynamically linked version (but I'm just guessing) - of perhaps they wanted to prevent mach-defpager from the need of reading libraries, since it's used when memory is really scarce (guessing again) + of perhaps they wanted to prevent mach-defpager from the need of + reading libraries, since it's used when memory is really scarce (guessing + again) diff --git a/open_issues/mach_vm_pageout.mdwn b/open_issues/mach_vm_pageout.mdwn new file mode 100644 index 00000000..dac7fe28 --- /dev/null +++ b/open_issues/mach_vm_pageout.mdwn @@ -0,0 +1,19 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_gnumach]] + +IRC, freenode, #hurd, 2011-09-09 + + It's amazing how broken some parts of Mach's VM are + currently, it doesn't even keep track of the number of external + pages in the lists + and vm_pageout_scan produces a hang if want_pages == FALSE (which + never is, because vm_page_external_count is always 0) diff --git a/open_issues/osf_mach.mdwn b/open_issues/osf_mach.mdwn new file mode 100644 index 00000000..d689bfcb --- /dev/null +++ b/open_issues/osf_mach.mdwn @@ -0,0 +1,237 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_glibc open_issue_gnumach open_issue_hurd]] + +IRC, freenode, #hurd, 2011-09-07 + + tschwinge: do you think that should be possible/convenient to + maintain hurd and glibc versions for OSF Mach as branches in the offical + git repo? + Is OSF Mach the MkLinux one? + Yes, it is + slpz: If there's a suitable license, then yes, of course! + Unless there is a proper upstream, of course. + But I don't assume there is? + slpz: What is interesting for us about OSF Mach? + tschwinge: Peter Bruin and Jose Marchesi did a gnuified version some + time ago (gnu-osfmach), so I suppose the license is not a problem. But + I'm going to check it, though + OSF Mach has a number of interesting features + like migrating threads, advisory pageout, clustered pageout, kernel + loaded tasks, short circuited RPC... + Oh! + Good. + right now I'm testing if it's really worth the effort + Yes. + But if the core codebase is the same (is it?) it may be + possible to merge some things? + If the changes can be identified reasonably... + comparing performance of the specialized RPC of OSF Mach with + generic IPC + That was my first intention, but I think that porting all those + features will be much more work than porting Hurd/glibc to it + slpz: ipc performance currently matters less than clustered + pageouts + slpz: i'm really not sure .. + i'd personnally adapt the kernel + braunr: well, clustered pageouts is one of the changes that can be + easily ported + braunr: We can consider OSF Mach code as reasonably stable, and + porting its features to GNU Mach will take us to the point of having to + debug all that code again + probably, the hardest feature to be ported is migrating threads + isn't that what was tried for gnu mach 2 ? or was it only about + oskit ? + IIRC only oskit + slpz: But there have been some advancements in GNU Mach, too. + For example the Xen port. + But wen can experiment with it, of course. + tschwinge: I find easier to move the Xen support from GNU Mach to + OSF Mach, than porting MT in the other direction + slpz: And I think MkLinux is a single-server, so I don't this + they used IPC as much as we did? + slpz: OK, I see. + slpz: MT aren't as needed as clustered pageouts :p + gnumach already has ipc handoff, so MT would just consume less + stack space, and only slightly improve raw ipc performance + slpz: But we will surely accept patches that get the Hurd/glibc + ported to OSF Mach, no question. + (it's required for other issues we discussed already, but not a + priority imo) + tschwinge: MkLinux makes heavy use of IPC, but it tries to + "short-circuit" it when running as a kernel loaded task + And it's obviously best to keep it in one place. Luckily it's + not CVS branches anymore... :-) + braunr: well, I'm a bit obsessed with IPC peformance, if the RPC on + OSF Mach really makes a difference, I want it for Hurd right now + braunr: clustered pages can be implemented at any time :-) + tschwinge: great! + slpz: In fact, haven'T there already been some Savannah + repositories created, several (five?) years ago? + slpz: the biggest performance issue on the hurd is I/O + and the easiest way to improve that is better VM transfers + tschwinge: yes, the HARD project, but I think it wasn't too well + received... + slpz: Quite some things changed since then, I'd say. + braunr: I agree, but IPC is the hardest part to optimize + braunr: If we have a fast IPC, the rest of improvements are way + easier + slpz: i don't see how faster IPC makes I/O faster :( + slpz: read + http://www.sceen.net/~rbraun/the_increasing_irrelevance_of_ipc_performance_for_microkernel_based_operating_systems.pdf + again :) + braunr: IPC puts the upper limit of how fast I/O could be + the abstract for my thesis on x15 mach was that the ipc code was + the most focused part of the kernel + so my approach was to optimize everything *else* + the improvements in UVM (and most notably clustered page + transfers) show global system improvements up to 30% in netbsd + we should really focus on the VM first (which btw, is a pain in + the ass with the crappy panicking swap code in place) + and then complete the I/O system + braunr: If a system can't transfer data between translators faster + than 100 MB/s, faster devices doesn't make much sense + has anyone considered switching the syscalls to use + sysenter/syscall instead of soft interrupts? + braunr: but I agree on the VM part + guillem: it's in my thesis .. but only there :) + slpz: let's reach 100 MiB/s first, then improve IPC + guillem: that's a must do, also moving to 64 bits :-) + guillem: there are many tiny observations in it, like the use of + global page table entries, which was added by youpi around that time + slpz: I wanted to fix all warnings first before sending my first + batch of 64 bit fixes, but I think I'll just send them after checking + they don't introduce regressions on i386 + braunr: interesting I think I might have skimmed over your + thesis, maybe I should read it properly some time :) + braunr: I see exactly as the opposite. First push IPC to its limit, + then improve devices/VM + guillem: that's great :-) + slpz: improving ipc now will bring *nothing*, whereas improving + vm/io now will make the system considerably more useable + but then fixing 64-bit issues in the Linux code is pretty + annoying given that the latest code from upstream has that already fixed, + and we are “supposed” to drop the linux code from gnumach at some point + :) + slpz: that's a basic principle in profiling, improve what brings + the best gains + braunr: I'm not thinking about today, I'm thinking about how fast + Hurd could be when running on Mach. And, as I said, IPC is the absolute + upper limit. + i'm really not convinced + there are that many tasks making extensive use of IPCs + most are cpu/IO bound + but I have to acknowledge that this concern has been really + aliviated by the EPT improvement discovery + there aren't* that many tasks + braunr: create a ramdisk an write some files on it + braunr: there's no I/O in that case, an performance it's really low + too + well, ramdisks don't even work correctly iirc + I must say that I consider improvements in OOL data moving as if it + were in IPC itself + braunr: you can simulate one with storeio + slpz: then measure what's slow + slpz: it couldn't simply be the vm layer + braunr: + http://www.gnu.org/s/hurd/hurd/libstore/examples/ramdisk.html + ok, it's not a true ramdisk + it's a stack of a ramdisk and extfs servers + ext2fs* + i was thinking about tmpfs + True, but one of Hurd main advantages is the ability of doing that + kind of things + so they must work with a reasonable performance + other systems can too .. + anyway + i get your point, you want faster IPCs, like everyone does + braunr: yes, and I also want to know how fast could be, to have a + reference when profiling complex services + slpz: really improving IPC performance probably requires changing + the semantics... but we don't know which semantics we want until we have + actually tried fixing the existing bottlenecks + well, not only bottlenecks... also other issues such as resource + management + antrik: I think fixing bottlenecks would probably require changes in + some Mach interfaces, not in the IPC subsystem + antrik: I mean, IPC semantics just provide the basis for messaging, + I don't think we will need to change them further + slpz: right, but only once we have addressed the bottlenecks (and + other major shortcomings), we will know how the IPC mechanisms needs to + change to get further improvements... + of course improving Mach IPC performance is interesting too -- if + nothing else, then to see how much of a difference it really makes... I + just don't think it should be considered an overriding priority :-) + slpz: I agree with braunr, I don't think improving IPC will bring + much on the short term + the buildds are slow mostly because of bad VM + like lack of read-ahead, the randomness of object cache pageout, + etc. + that doesn't mean IPC shouldn't be improved of course + but we have a big margin for iow + s/iow/now + youpi: I agree with you and with braunr in that regard. I'm not + looking for an inmediate improvement, I just want to see how fast the IPC + (specially, OOL data transfers) could be. + also, migrating threads will help to fix some problems related with + resource management + slpz: BTW, what about Apple's Mach? isn't it essentialy OSF Mach + with some further improvements?... + antrik: IPC is an area with very little room for improvement, so I + don't we will fix that bottlenecks by applying some changes there + well, for large OOL transfers, the limiting facter is certainly + also VM rather than the thread model?... + antrik: yes, but I think is encumbered with the APPLv2 license + ugh + antrik: for OOL transfers, VM plays a big role, but IPC also has + great deal of responsibility + as for resource management, migrating threads do not really help + much IMHO, as they only affect CPU scheduling. memory usage is a much + more pressing issue + BTW, I have thought about passive objects in the past, but didn't + reach any conclusion... so I'm a bit ambivalent about migrating threads + :-) + As an example, in Hurd on GNU Mach, an io_read can't take advantage + from copy-on-write, as buffers from the translator always arrive outside + user's buffer + antrik: well, I think cpu scheduling is a big deal ;-) + antrik: and for memory management, until a better design is + implemented, some fixes could be applied to get us to the same level as a + monolithic kernel + to get even close to monolithic systems, we need either a way to + account server resources used on client's behalf, or to make servers use + client-provided resources. both require changes in the IPC mechanism I + think... + (though *if* we go for the latter option, the CPU scheduling + changes of migrating threads would of course be necessary, in addition to + any changes regarding memory management...) + slpz: BTW, I didn't get the point about io_read and COW... + antrik: AFAIK, the FS cache (which is our primary concern) in most + monolithic system is agnostic with respect the users, and only deals with + absolute numbers. In our case we can do almost the same by combining Mach + and pagers knowledege. + slpz: my primary concern is that anything program having a hiccup + crashes the system... and I'm not sure this can be properly fixed without + working memory accounting + (I guess in can be worked around to some extent by introducing + various static limits on processes... but I'm not sure how well) + it can + antrik: monolithic system also suffer that problem (remember fork + bombs) and it's "solved" by imposing static limits to user processes + (ulimit). + antrik: we do have more problems due to port management, but I think + some degree of control can be archieved with a reasonably amount of + changes. + slpz: in a client-server architecture static limits are much less + effective... that problem exists on traditional systems too, but only in + some specific cases (such as X server); while on a microkernel system + it's ubiquitous... that's why we need a *better* solution to this problem + to get anywhere close to monolithic systems diff --git a/open_issues/performance/degradation.mdwn b/open_issues/performance/degradation.mdwn index db759308..8c9a087c 100644 --- a/open_issues/performance/degradation.mdwn +++ b/open_issues/performance/degradation.mdwn @@ -10,8 +10,12 @@ License|/fdl]]."]]"""]] [[!meta title="Degradation of GNU/Hurd ``system performance''"]] -Email, *id:"87mxg2ahh8.fsf@kepler.schwinge.homeip.net"* (bug-hurd, 2011-07-25, -Thomas Schwinge) +[[!tag open_issue_gnumach open_issue_hurd]] + +[[!toc]] + + +# Email, `id:"87mxg2ahh8.fsf@kepler.schwinge.homeip.net"` (bug-hurd, 2011-07-25, Thomas Schwinge) > Building a certain GCC configuration on a freshly booted system: 11 h. > Remove build tree, build it again (2nd): 12 h 50 min. Huh. Remove build @@ -27,9 +31,8 @@ IRC, freenode, #hurd, 2011-07-23: are some serious fragmentation issues < braunr> antrik: both could be induced by fragmentation ---- -During [[IPC_virtual_copy]] testing: +# During [[IPC_virtual_copy]] testing IRC, freenode, #hurd, 2011-09-02: @@ -38,3 +41,8 @@ IRC, freenode, #hurd, 2011-09-02: 800 fifteen minutes ago) manuel: i observed the same behaviour [...] + + +# IRC, freenode, #hurd, 2011-09-22 + +See [[/open_issues/pagers]], IRC, freenode, #hurd, 2011-09-22. diff --git a/open_issues/performance/io_system/clustered_page_faults.mdwn b/open_issues/performance/io_system/clustered_page_faults.mdwn index 9e20f8e1..a3baf30d 100644 --- a/open_issues/performance/io_system/clustered_page_faults.mdwn +++ b/open_issues/performance/io_system/clustered_page_faults.mdwn @@ -137,3 +137,26 @@ License|/fdl]]."]]"""]] where the pager interface needs to be modified, not the Mach one?... antrik: would be nice wouldn't it ? :) antrik: more probably the page fault handler + + +# IRC, freenode, #hurd, 2011-09-28 + + antrik: I've just recovered part of my old multipage I/O work + antrik: I intend to clean and submit it after finishing the changes + to the pageout system. + slpz: oh, great! + didn't know you worked on multipage I/O + slpz: BTW, have you checked whether any of the work done for GSoC + last year is any good?... + (apart from missing copyright assignments, which would be a + serious problem for the Hurd parts...) + antrik: It was seven years ago, but I did: + http://www.mail-archive.com/bug-hurd@gnu.org/msg10285.html :-) + antrik: Sincerely, I don't think the quality of that code is good + enough to be considered... but I think it was my fault as his mentor for + not correcting him soon enough... + slpz: I see + TBH, I feel guilty myself, for not asking about the situation + immediately when he stopped attending meetings... + slpz: oh, you even already looked into vm_pageout_scan() back then + :-) diff --git a/open_issues/performance/ipc_virtual_copy.mdwn b/open_issues/performance/ipc_virtual_copy.mdwn index 00fa7180..9708ab96 100644 --- a/open_issues/performance/ipc_virtual_copy.mdwn +++ b/open_issues/performance/ipc_virtual_copy.mdwn @@ -356,3 +356,40 @@ IRC, freenode, #hurd, 2011-09-06: in PV it does not make sense: the guest already provides the translated page table which is just faster than anything else + +IRC, freenode, #hurd, 2011-09-09: + + oh BTW, for another data point: dd zero->null gets around 225 MB/s + on my lowly 1 GHz Pentium3, with a blocksize of 32k + (but only half of that with 256k blocksize, and even less with 1M) + the system has been up for a while... don't know whether it's + faster on a freshly booted one + +IRC, freenode, #hurd, 2011-09-15: + + + http://www.reddit.com/r/gnu/comments/k68mb/how_intelamd_inadvertently_fixed_gnu_hurd/ + so is the dd command pointed to by that article a measure of io + performance? + sudoman: no, not really + it's basically the baseline of what is possible -- but the actual + slowness we experience is more due to very unoptimal disk access patterns + though using KVM with writeback caching does actually help with + that... + also note that the title of this post really makes no + sense... nested page tables should provide similar improvements for *any* + guest system doing VM manipulation -- it's not Hurd-specific at all + ok, that makes sense. thanks :) + +IRC, freenode, #hurd, 2011-09-16: + + antrik: I wrote that article (the one about How AMD/Intel fixed...) + antrik: It's obviously a bit of an exaggeration, but it's true that + nested pages supposes a great improvement in the performance of Hurd + running on virtual machines + antrik: and it's Hurd specific, as this system is more affected by + the cost of page faults + antrik: and as the impact of virtualization on the performance is + much higher than (almost) any other OS. + antrik: also, dd from /dev/zero to /dev/null it's a measure on how + fast OOL IPC is. diff --git a/open_issues/resource_management_problems.mdwn b/open_issues/resource_management_problems.mdwn index 1558bebb..8f752d61 100644 --- a/open_issues/resource_management_problems.mdwn +++ b/open_issues/resource_management_problems.mdwn @@ -77,6 +77,10 @@ IRC, freenode, #hurd, 2011-07-31 # Further Examples + * [[hurd/critique]] + * [[IO_accounting]] + * [[translators_set_up_by_untrusted_users]], and [[pagers]] + * [[configure max command line length]] diff --git a/open_issues/resource_management_problems/pagers.mdwn b/open_issues/resource_management_problems/pagers.mdwn new file mode 100644 index 00000000..4c36703c --- /dev/null +++ b/open_issues/resource_management_problems/pagers.mdwn @@ -0,0 +1,322 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!tag open_issue_gnumach]] + +[[!toc]] + + +# IRC, freenode, #hurd, 2011-09-14 + +Coming from [[translators_set_up_by_untrusted_users]], 2011-09-14 discussion: + + antrik: I think a tunable option for preventing non-root users from + creating pagers and attaching translators could also be desirable + slpz: why would you want to prevent creating pagers and attaching + translators? + Preventing resource exhaustion, I guess. + antrik: security and (as tschwinge says) for prevent a rouge pager + from exhausting the system. + antrik: without the ability to use translators for non-root users, + Hurd can provide (almost) the same level of resource protection than + other *nixes + +See also: [[translators_set_up_by_untrusted_users]], +[[hurd/translator/tmpfs/tmpfs_vs_defpager]]. + + the hurd is about that though + there should be also a limit on the number of outstanding requests + that a task can have, and some other easily traceable values + port messages queues have limits + slpz: anything can exhaust the system. there are much more basic + limits that are missing... and I don't see how translators or pagers are + special in that regard + braunr: that's what I said tunable. If I don't share my computer + with untrusted users, I want full functionality. Otherwise, I can enable + that limitation + braunr: but I think those limits are on reception + that's a wrong solution + antrik: because pagers are external memory objects, and those are + treated differently + compared to what ? + and yes, the limit is on the message queue, on reception + why is that a problem ? + antrik: forbidding the use of translator was for security, to avoid + the problem of traversing an untrusted FS + braunr: compared to anonymous memory + braunr: because if the limit is on reception, a task can easily do a + DoS against a server + hm actually, the problems we have with swap handling is that + anonymous memory is handled in a very similar way as other objects + braunr: I want to limit the number of outstanding (unprocessed + messages in queues) requests + slpz: the solution isn't about forbidding the use of translators, + but changing common code (libc i guess) not to use them, they can still + run beside + braunr: that's because, currently, the external page limit is not + enforced + i'm also not sure about DoS attacks + if i'm right, there is often one port for each managed object, + which usually exist per client + braunr: yes, that could an option too (for translators, not for + pagers) + i don't see how pagers wouldn't be translators on the hurd + braunr: all pagers are translators, but not all translators are + pagers ;-) + so if it works for translators, it also works for pagers + braunr: it would fix the security issue, but not the resource + exhaustion problem, with only affects to pagers + i just don't see a point in implementing resource limits before + even fixing other fundamental issues + the only way to avoid resource exhaustion is resource limits + slpz: just not following untrusted translators is much more useful + than forbidding them alltogether + and the main problem of mach is resource accounting + so first, fix that, using the critique as a starting point + +[[hurd/critique]]. + + braunr: i'm not saying that this should be implemented right now, + i'm just pointing out this possibility + i think we're all mostly aware of it + braunr: resource accounting, as it's expressed in the critique, + would be wonderful, but it's just too complex IMHO + it requires carefully designed changes to the interface yes + to the interface, to the internals, to user space tasks... + the internals wouldn't be impacted that much + user space tasks would mostly include hurd servers + if the changes are centralized in libraries, it should be easy to + provide to the servers + + +# IRC, freenode, #hurd, 2011-09-22 + + antrik: I've also implemented a simple resource control on dirty + pages and changed pageout_scan to free external pages, and only touch + anonymous memory if it's really needed + antrik: those combined make the system work better under heavy load + antrik: 1.5 GB of RAM and another 1.5 GB of swap helps a lot, too + :-) + hm... I'm not sure what these things mean exactly TBH... but I + wonder whether some of these could fix the performance degradation (and + ultimate crash) I described recently... + +[[/open_issues/default_pager]], [[system performance degradation +(?)|performance/degradation]]. + + care to explain them to a noob like me? + probably not. During my tests, I've noticed that, at some points, + the system performance starts to degrade, and this doesn't change until + it's restarted + but I wasn't able to create a test case to reproduce the bug... + antrik: Sure. First, I've changed GNU Mach to: + - Classify all pages from data_supply as external, and count them + in vm_page_external_count (previously, this variable was always zero) + +[[/open_issues/mach_vm_pageout]] + + - Count all pages for which a data_unlock has been requested as + potentially dirty pages + there is one important bit I forgot to mention in my recent + report: one "reliable" way to cause growing swap usage is simply + installing a lot of debian packages (e.g. running an apt-get upgrade) + some other kinds of I/O also seem to have such an effect, but I + wasn't able to pinpoint specific situations + - Establish a limit on how many potentially dirty pages are + allowed. If it's reached, a notification (right now it's just a bogus + m_o_data_unlock, to avoid implementing a new RPC) it's sent to the pager + which has generated the page fault + - Establish a hard limit on those dirt pages. If it's reached, + threads asking for a data_unlock are blocked until someone cleans some + pages. This should be improved with a forced pageout, if needed. + - And finally, in vm_pageout_scan, run over the inactive queue + searching for clean, external pages, freeing them. If it's not possible + to free enough pages, or if vm_page_external_count is less than 10% of + system's memory, the "normal" pageout is used. + I need to clean up things a little, but I want to send a preliminary + patch to bug-hurd ASAP, to have more people testing it. + antrik: Do you thing that performance degradation can be related + with the number of threads of your ext2fs translators? + slpz: hm... I didn't watch that recently; but in the past, I + observe that the thread count is pretty constant after it reaches + something like 14000 on heavy load... + err... wait, 14000 was ports :-) + I doubt my system would survive 14000 threads ;-) + don't remember thread count... I guess I should start watching + this again + antrik: I was thinking that 14000 threads sound like a lot :-) + what I know for sure, is that when operating with large files, the + deactivation of all pages of the memory object which is done after every + operation really hurts to performance + right now my root FS has 5100 ports and a mere 71 thread... but + then, it's almost freshly booted :-) + that's why I've just commented that operation in my code, since it's + not really needed anymore :-) + anyway, after submitting all my pending mails to bug-hurd, I'll try + to hunt that bug. Sounds funny. + regarding your explanation, I'm still trying to wrap my head + around some of the details. I must admit that I don't remember what + data_unlock does... or maybe I never fully understood it + the limit on dirty pages is global? + yes, right now it's global + I try to find the old discussion of the thread storm stuff + there was some concern about deadlocks + marcusb: yes, because we were talking about putting an static limit + for the server threads of a translators + marcusb: and that was wrong (my fault, I was even dumber back then + :-P) + oh boy digging in old mail is no fun. first I see mistakes in my + english. then I see quite complicated pager stuff I don't ever remember + touching. but there is a patch, and it has my name on it + I think I lost a couple of the early years of my hurd hacking :) + hm... I reread the chapter on locking, and it's still above me :-( + not sure what you are talking about, but if there are any + specific questions... + marcusb: external pager interface + +[[microkernel/mach/external_pager_mechanism]]. + + uuuuh ;) + memory_object_lock_request(), memory_object_lock_completed(), + memory_object_data_unlock() + is that from the mach manual? + yes + I didn't really understand that part when I first read it a couple + of years ago, and I still don't understand it now :-( + I am sure I didn't understand it either + and maybe I missed my window :) + let's see + hehe + slpz: what exactly do you mean by "the pager which has generated + the page fault"? + marcusb: essentially I'm trying to understand the explanation of + the changes slpz did, but there are several bits totally obscure to me + :-( + antrik: when a I/O operation is requested to ext2fs, it maps the + object in question to it's own space, and then memcpy's from/to there + antrik: so the translator (which is also a pager) is the one who + generates the page fault + yeah + antrik: it's important to understand which messages are sent by + the kernel to the manager and which are sent the other way + if the dest port is memory_object_t, that indicates a msg from + kernel to manager. if it is memory_object_control_t, it's a msg from + manager to kernel + antrik: m_o_lock_request it's used by the pager to "settle" the + status of a memory object, m_o_lock_completed is the answer from the + kernel when the lock has been completed (only if the client has requested + to be notified), and m_o_data_unlock is a request from the kernel to + change the level of protection for a page (it's called from vm_fault.c) + slpz: but it's not pagers generating page faults, but users of + the memory object on the other side + marcusb: well, I think the direction is clear to me... but the + purpose not really :-) + ie a client that mapped a file + antrik: in ext2fs, all pages are initially provided to the kernel + (via data_supply) write protected. When a write operation is done over + one of those pages, a page fault it's generated, which sends a + m_o_data_unlock to the pager, which answers (if convenient) which a + page_lock decreasing the protection level + antrik: one use of lock_request is when you want to shut down + cleanly and want to get the dirty pages written back to you from the + kernel. + antrik: the other thing may be COW strategies + marcusb: well, pagers and clients are in the same task for most + translators, like ext2fs + slpz: oh. + marcusb: but yes, a read operation in a mmap'ed file would trigger + the fault in a client user task + slpz: I think I forgot everything about pagers :) + marcusb: pager-memcpy.c is the key :-) + slpz: what becomes of the fault then? the kernel sees it's a + mapped memory object. will it then talk to the manager or to a pager? + slpz: the translator causes the faults itself when it handles + io_read()/io_write() requests I suppose, as opposed to clients accessing + mmap()ed objects which then generate the faults?... + ah, that's actually what you already said above :-) + marcusb: I'm not sure what do you mean by "manager"... + manager == memory object + mh + marcusb: for all external objects, it will ask to their current + pager + slpz: I think I am missing a couple of details, so nevermind. + It's starting to come back to me, but I am a bit afraid of that ;) + what I love about the Hurd is how damn readable the code is + considering it's an object system, it's so much nicer to read + than gtk stuff + when you get the big picture, it's actually somewhat fun to see how + data moves around just to fulfill a simple read() + you should make a diagram! + bonus point for animated video ;) + +[[hurd/IO_path]]. + + marcusb: heh, take a look at the hurd specific parts of glibc... I + cry in pain every time a do that... + slpz: oh yeah, rdwr-internal. + oh man + slpz: funny thing, I just looked at them the other day because of + the security issue + marcusb: I think there was one, maybe a slice from someone's + presentation... + I think I was always confused about the pager/memobj/kernel + interactions + marcusb: I'm barely able to read Roland's glibc code. I think it's + out of my reach. + marcusb: I think part of the problem is confusing terminology + it's good that you are instrumenting the mach kernel to see + what's actually going on in there. it was a black book for me, but neal + too a peek and got a much better understanding of the performance issues + than I ever did + when talking about "pager", we usually mean the process doing the + paging; but in mach terminology this actually seems to be the "manager", + while a "pager" is an individual object in the manager process... or + something like that ;-) + antrik: I just never took a look at the big picture. I look at + the parts + I knew the tail, ears, and legs of the elephant. + it's a lot of code for a beginner + I never understood the distinction between "pager" and "memory + object" though... + maybe "pager" refers to the object in the external pager, while + "memory object" is the part managed in Mach itself?... + memory object is a real object, to which you can send messages. + it's implemented in the server + hm... maybe it's the other way around then ;-) + there is also the default pager + I think the pager is just another name for the process that + serves the memory object (default pager == memory object for anonymous + memory == swap) + but! + there is also libpager + +[[hurd/libpager]] + + and that's a more complicated beast + actually, the correct term seems to be "default memory manager"... + yeah + from mach's pov + we always called it default pager in the Hurd + marcusb: problem is that "pager" is sometimes used in the Mach + documentation to refer to memory object ports IIRC + isn't it defpager executable? + could be + it's the same thing, really + indeed, the program implementing the default memory manager is + called "default pager"... so the terminology is really inconsistent + the hurd's pager library is a high level abstraction for mach's + external memory object interface. + i wouldn't worry about it too much + I never looked at libpager + you should! + it's an important beast + never seemed relevant to anything I did so far... + though maybe it would help understanding + it's related to what you are looking now :) diff --git a/open_issues/rework_gnumach_ipc_spaces.mdwn b/open_issues/rework_gnumach_ipc_spaces.mdwn index b3d1b4a4..7c66776b 100644 --- a/open_issues/rework_gnumach_ipc_spaces.mdwn +++ b/open_issues/rework_gnumach_ipc_spaces.mdwn @@ -10,7 +10,7 @@ License|/fdl]]."]]"""]] [[!tag open_issue_gnumach]] -[[!toc] +[[!toc]] # IRC, freenode, #hurd, 2011-05-07 diff --git a/open_issues/translators_set_up_by_untrusted_users.mdwn b/open_issues/translators_set_up_by_untrusted_users.mdwn index 36fe5438..97f48bba 100644 --- a/open_issues/translators_set_up_by_untrusted_users.mdwn +++ b/open_issues/translators_set_up_by_untrusted_users.mdwn @@ -324,3 +324,24 @@ do bear some similarity with the issue we're discussing here. it should be one's normal right to change the view one has of it we discussed that once actually I believe... err... private namespaces I mean + +IRC, freenode, #hurd, 2011-09-10: + + I am rereading Neal Walfield's and Marcus Brinkman's critique of + the hurd on mach. One of the arguments is that a file system may be + malicious (by DoS its clients with infinitely deep directory + hierarchies). Is there an answer to that that does not require programs + to be programmed defensively against such possibilities? + +IRC, freenode, #hurd, 2011-09-14: + + cjuner: regarding malicious filesystems: the answer is to do + exactly the same as FUSE on Linux: don't follow translators set up by + untrusted users by default + antrik, but are legacy programs somehow protected? What about + executing `find`? Or is GNU's find somehow protected from that? + cjuner: I'm talking about a global policy + antrik, and who would implement that policy? + cjuner: either glibc or the parent translators + +Continued discussion about [[resource_management_problems/pagers]]. -- cgit v1.2.3 From ed0da874d7ef1d7d93084efbe76057e1f652b67e Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Tue, 4 Oct 2011 11:11:00 +0200 Subject: shortcuts: [[!message-id]]. --- open_issues/fakeroot-tcp_vs_eintr.mdwn | 2 +- open_issues/gcc.mdwn | 8 ++++---- open_issues/glibc.mdwn | 2 +- open_issues/ifunc.mdwn | 6 +++--- open_issues/notmuch_n_gmane.mdwn | 18 ++++++++++++++++++ open_issues/performance/degradation.mdwn | 2 +- open_issues/translate_fd_or_port_to_file_name.mdwn | 3 ++- shortcuts.mdwn | 5 +++++ 8 files changed, 35 insertions(+), 11 deletions(-) create mode 100644 open_issues/notmuch_n_gmane.mdwn (limited to 'open_issues/performance') diff --git a/open_issues/fakeroot-tcp_vs_eintr.mdwn b/open_issues/fakeroot-tcp_vs_eintr.mdwn index 00b57059..36707cd2 100644 --- a/open_issues/fakeroot-tcp_vs_eintr.mdwn +++ b/open_issues/fakeroot-tcp_vs_eintr.mdwn @@ -13,7 +13,7 @@ License|/fdl]]."]]"""]] # [[!debbug 641200]] -`id:"87litvz9me.fsf@kepler.schwinge.homeip.net"` +[[!message-id "87litvz9me.fsf@kepler.schwinge.homeip.net"]] [...] if test -z "$*"; then diff --git a/open_issues/gcc.mdwn b/open_issues/gcc.mdwn index 04c111fd..a3b3a2d2 100644 --- a/open_issues/gcc.mdwn +++ b/open_issues/gcc.mdwn @@ -182,10 +182,10 @@ Last reviewed up to the [[Git mirror's 3d83581faf4eaf52c1cf52cc0d11cc7dd1264275 make[1]: Leaving directory `/media/boole-data/thomas/tmp/gnu-0/src/gcc.obj/i686-pc-gnu/libgcc' make: *** [all-target-libgcc] Error 2 - See threads at - *id:"AANLkTinY1Cd4_qO_9euYJN8zev4hdr7_ANpjNG+yGRMn@mail.gmail.com"*, - *id:"20110328225532.GE5293@synopsys.com"*, - *id:"4D52D522.1040804@gmail.com"*. Can simply configure the first GCC with + See threads at [[!message-id + "AANLkTinY1Cd4_qO_9euYJN8zev4hdr7_ANpjNG+yGRMn@mail.gmail.com"]], + [[!message-id "20110328225532.GE5293@synopsys.com"]], [[!message-id + "4D52D522.1040804@gmail.com"]]. Can simply configure the first GCC with `--disable-decimal-float`. Alternatively, can we use `#ifndef inhibit_libc` for this (these?) file(s)? diff --git a/open_issues/glibc.mdwn b/open_issues/glibc.mdwn index 0130aceb..45ce3b1d 100644 --- a/open_issues/glibc.mdwn +++ b/open_issues/glibc.mdwn @@ -192,7 +192,7 @@ Last reviewed up to the [[Git mirror's 16292eddd77f66002e2104848e75a0fb4d316692 make[1]: Leaving directory `/media/erich/home/thomas/tmp/glibc/tschwinge/Roger_Whittaker' make: *** [all] Error 2 - See `id:"871uv99c59.fsf@kepler.schwinge.homeip.net"`. Passing + See [[!message-id "871uv99c59.fsf@kepler.schwinge.homeip.net"]]. Passing `install_root=/INVALID` to `make`/`make check` is a cheap cure. * Verify baseline changes, if we need any follow-up changes: diff --git a/open_issues/ifunc.mdwn b/open_issues/ifunc.mdwn index 96928fdc..c357c99c 100644 --- a/open_issues/ifunc.mdwn +++ b/open_issues/ifunc.mdwn @@ -44,6 +44,6 @@ use it from GCC. * [[GCC]] - In `gcc/config.gcc`, set `default_gnu_indirect_function=yes' for us, like - done for GNU/Linux. See thread starting at - *id:"CAFULd4YZsAQ6ckFjXtU5-yyv=3tYQwTJOPhU9zmJxFOrnotj8g@mail.gmail.com"*. + In `gcc/config.gcc`, set `default_gnu_indirect_function=yes` for us, like + done for GNU/Linux. See thread starting at [[!message-id + "CAFULd4YZsAQ6ckFjXtU5-yyv=3tYQwTJOPhU9zmJxFOrnotj8g@mail.gmail.com"]]. diff --git a/open_issues/notmuch_n_gmane.mdwn b/open_issues/notmuch_n_gmane.mdwn new file mode 100644 index 00000000..664c9876 --- /dev/null +++ b/open_issues/notmuch_n_gmane.mdwn @@ -0,0 +1,18 @@ +[[!meta copyright="Copyright © 2011 Free Software Foundation, Inc."]] + +[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable +id="license" text="Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with no Invariant +Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license +is included in the section entitled [[GNU Free Documentation +License|/fdl]]."]]"""]] + +[[!meta title="Notmuch'n'Gmane"]] + +[[!taglink open_issue_documentation]]; [[ikiwiki]] issue. + +In `\[[!message-id +"AANLkTinY1Cd4_qO_9euYJN8zev4hdr7_ANpjNG+yGRMn@mail.gmail.com"]]`, underscores +are replaced with spaces in the rendered output: [[!message-id +"AANLkTinY1Cd4_qO_9euYJN8zev4hdr7_ANpjNG+yGRMn@mail.gmail.com"]]. diff --git a/open_issues/performance/degradation.mdwn b/open_issues/performance/degradation.mdwn index 8c9a087c..64542af3 100644 --- a/open_issues/performance/degradation.mdwn +++ b/open_issues/performance/degradation.mdwn @@ -15,7 +15,7 @@ License|/fdl]]."]]"""]] [[!toc]] -# Email, `id:"87mxg2ahh8.fsf@kepler.schwinge.homeip.net"` (bug-hurd, 2011-07-25, Thomas Schwinge) +# Email, [[!message-id "87mxg2ahh8.fsf@kepler.schwinge.homeip.net"]] (bug-hurd, 2011-07-25, Thomas Schwinge) > Building a certain GCC configuration on a freshly booted system: 11 h. > Remove build tree, build it again (2nd): 12 h 50 min. Huh. Remove build diff --git a/open_issues/translate_fd_or_port_to_file_name.mdwn b/open_issues/translate_fd_or_port_to_file_name.mdwn index 485fb985..bd9abcf9 100644 --- a/open_issues/translate_fd_or_port_to_file_name.mdwn +++ b/open_issues/translate_fd_or_port_to_file_name.mdwn @@ -83,4 +83,5 @@ A related issue: http://lists.gnu.org/archive/html/bug-bash/2011-04/msg00097.html i see -Also see email thread starting at `id:"20110714082216.GA8335@sceen.net"`. +Also see email thread starting at [[!message-id +"20110714082216.GA8335@sceen.net"]]. diff --git a/shortcuts.mdwn b/shortcuts.mdwn index 563bbe08..b62b2981 100644 --- a/shortcuts.mdwn +++ b/shortcuts.mdwn @@ -85,3 +85,8 @@ ikiwiki will include your shortcut in the standard underlay. * [[!shortcut name=GNU_Savannah_task url="http://savannah.gnu.org/task/?%s" desc="GNU Savannah task #%s"]] * [[!shortcut name=FF_project url="http://www.fossfactory.org/project/p%s" desc="FOSS Factory bounty (p%s)"]] + + +## Notmuch'n'Gmane. + + * [[!shortcut name=message-id url="http://thread.gmane.org/%s" desc="""`id:"%s"`"""]] -- cgit v1.2.3