summaryrefslogtreecommitdiff
path: root/doc/hurd.texi
blob: 35414410ee6205207997cc5b353bfd3f8565e833 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
\input texinfo  @c -*-texinfo-*-
@setfilename hurd.info

@c FIXME: might it be useful to have a glossary?
@c   tb: yes, indeed.  very much so.  If you provide a list of words,
@c   tb: then I'll write definitions.

@c FIXME: Please use the active voice whenever possible.  There are
@c a lot of sentences here that use passive voice, and it's therefore
@c hard for me to tell who is doing what to whom.  I have not changed
@c those sentences because I did not know what to make the subject and
@c what the object.
@c   tb: can you mark the unclear sentences so I can fix them up?

@c FIXME: Be consistent with mood -- that is, when writing the
@c descriptions of functions, either write them all as declaratives (as
@c in "This function twiddles the frobs.") or as imperatives (as in
@c "Twiddle the frobs.").  As this now stands, some function definitions
@c are imperative (such as ports_reallocate_port: `Destroy the receive right'), and some
@c are declarative (such as ihash_set_cleanup: `Sets @var{ht}'s element
@c cleanup function').  This inconsistency is confusing.  A particularly
@c confusing example is the description of `pager_unlock_page': it reads
@c `A page should be made writable.'
@c   tb: This is, I think, a consequence of many of the sections having
@c   tb: been written by cut-and-paste from the header files (which is a
@c   tb: decent way to start, but the results do need patching up).

@c FIXME: Might we want to use some sort of highlighting when we
@c refer to libraries by an abbreviated version of their name?  For
@c example, we often refer to `fshelp', by
@c which we mean the library `libfshelp.a'.  On those few occasions when
@c we bother to spell out its full name, we use `@code', as we should;
@c but when we abbreviate the name to `fshelp', we use no highlighting
@c at all.  These un-highlighted abbreviated names look odd to me.
@c   tb: Yes, perhaps so.  We should consult a Texinfo god for advice.

@c FIXME: I think we should say `zero' instead of `NULL' or `NUL'.
@c Currently, this document uses all three, which is confusing.
@c   tb: I see no uses of "NULL".  "null" is used, as an adjective,
@c   tb: which is synonymous with "zero" for pointers, but has different
@c   tb: connotations.  "NUL" is an ASCII character, and is explicitly
@c   tb: used only as such.


@c FIXME: This document sometimes says something MUST be
@c such-and-such, and other times says something SHOULD be
@c such-and-such.  It's not clear if you're using `must' and `should'
@c interchangeably, or if instead the really mean different things.
@c (Similarly for `may', `do', and `does'.)  Also, when we say something
@c MUST be such-and-such, I for one always wonder `what happens if it
@c isn't'?  For example, the description of `diskfs_create_protid' says
@c `The node @code{@var{po}->np} must be locked.'  I wonder `what
@c happens if the node isn't locked?'  I imagine other programmers will
@c wonder that too; in that case, perhaps the description should say
@c `The node @code{@var{po}->np} must be locked; otherwise the function
@c returns ENAUGHTY'.  A particularly confusing example is the paragraph
@c in subsection I/O Object Ports, which begins `The uid and gid sets
@c associated with a port may not be visibly shared  with other ports,
@c nor may they ever change.  The server must fix the identification of
@c a set of uids and gids with a particular port at the moment of the
@c port's creation.'
@c   tb: If the node is not locked on entry to diskfs_create_protid,
@c   tb: then the user (the program linking against libdiskfs) is
@c   tb: violating the interface, and the results are Undefined.  The
@c   tb: resulting filesystem will experience difficult-to-trace and
@c   tb: apparently random crashes and data corruption.
@c   tb: We don't WANT such functions to have to check for and return
@c   tb: error codes any more than we want scanf to try and diagnose
@c   tb: stray pointers.  But this does not mean that all things are as
@c   tb: clear as they should be, either.  "must" means "if you don't do
@c   tb: this, then you are violating the interface".  "should" often
@c   tb: means the same same thing, sometimes it's looser.  The real
@c   tb: issue here is that we should define exactly what the
@c   tb: consequences of violating an interface are.  In the case of
@c   tb: library interfaces, it means that the resulting program's
@c   tb: behavior is undefined.  In the case of server interfaces, it
@c   tb: means that one has Broken The Rules and that other programs
@c   tb: will behave in correspondingly bad ways.  In any case, some
@c   tb: careful auditing and editing of this kind of thing needs to
@c   tb: happen, but not until we have written more actual text.



@c Get the Hurd version we are documenting.
@include version.texi

@c Unify all our little indices for now.
@defcodeindex sc
@syncodeindex sc cp
@syncodeindex fn cp
@syncodeindex vr cp
@syncodeindex tp cp
@syncodeindex pg cp

@dircategory Kernel
@direntry
* Hurd: (hurd).  Using and programming the Hurd kernel servers.
@end direntry

@ifinfo
Copyright @copyright{} 1994-2002  Free Software Foundation, Inc.

Permission is granted to make and distribute verbatim copies of
this manual provided the copyright notice and this permission notice
are preserved on all copies.

@ignore
Permission is granted to process this file through TeX and print the
results, provided the printed document carries a copying permission
notice identical to this one except for the removal of this paragraph
(this paragraph not being relevant to the printed manual).

@end ignore

Permission is granted to copy and distribute modified versions of this
manual under the conditions for verbatim copying, provided also that
the entire resulting derived work is distributed under the terms of a
permission notice identical to this one.

Permission is granted to copy and distribute translations of this manual
into another language, under the above conditions for modified versions.
@end ifinfo

@setchapternewpage none
@settitle Hurd Reference Manual
@titlepage
@finalout
@title The GNU Hurd Reference Manual
@author Thomas Bushnell, BSG
@author Gordon Matzigkeit
@page

@vskip 0pt plus 1filll
Copyright @copyright{} 1994--2002 Free Software Foundation, Inc.

Permission is granted to make and distribute verbatim copies of
this manual provided the copyright notice and this permission notice
are preserved on all copies.

Permission is granted to copy and distribute modified versions of this
manual under the conditions for verbatim copying, provided also that
the entire resulting derived work is distributed under the terms of a
permission notice identical to this one.

Permission is granted to copy and distribute translations of this manual
into another language, under the above conditions for modified versions.
@end titlepage

@ifinfo

@node Top
@top The GNU Hurd

This file documents the GNU Hurd kernel component.  This edition of the
documentation was last updated for version @value{VERSION} of the Hurd.

@menu
* Introduction::                How to use this manual.
* Bootstrap::                   Turning a computer into a Hurd machine.
* Foundations::                 Basic features used throughout the Hurd.
* Input and Output::            Reading and writing I/O channels.
* Files::                       Regular file and directory nodes.
* Special Files::               Files with unusual Unix-compatible semantics.
* Stores::                      Generalized units of storage.
* Stored Filesystems::          Filesystems for physical media.
* Twisted Filesystems::         Providing new hierarchies for existing data.
* Distributed Filesystems::     Sharing files between separate machines.
* Networking::                  Interconnecting with other machines.
* Terminal Handling::           Helping people interact with the Hurd.
* Running Programs::            Program execution and process management.
* Authentication::              Verifying user and server privileges.
* Index::                       Guide to concepts, functions, and files.

@detailmenu
 --- The Detailed Node Listing ---

Introduction

* Audience::                    The people for whom this manual is written.
* Features::                    Reasons to install and use the Hurd.
* Overview::                    Basic architecture of the Hurd.
* History::                     How the Hurd was born.
* Copying::                     The Hurd is free software.

Bootstrap

* Bootloader::                  Starting the microkernel, or other OSes.
* Server Bootstrap::            Waking up the Hurd.
* Shutdown::                    Letting the Hurd get some rest.

Server Bootstrap

* Invoking serverboot::         Starting a set of interdependent servers.
* Boot Scripts::                Describing server bootstrap relationships.
* Recursive Bootstrap::         Running a Hurd under another Hurd.
* Invoking boot::               How to use the boot program.

Foundations

* Threads Library::             Every Hurd server and library is multithreaded.
* Ports Library::               Managing server port receive rights.
* Integer Hash Library::        Integer-keyed hash tables.
* Misc Library::                Things that soon will be in the GNU C library.
* Bug Address Library::         Where to report Hurd bugs.

Ports Library

* Buckets and Classes::         Basic units of port organization.
* Port Rights::                 Moving port rights to and from @code{libports}.
* Port Metadata::               Managing port-related information.
* Port References::             Guarding against leaks and lossage.
* RPC Management::              Locking and interrupting RPC operations.

Input and Output

* Iohelp Library::              I/O authentication and lock management.
* Pager Library::               Implementing multithreaded external pagers.
* I/O Interface::               RPC-based input/output channels.

Iohelp Library

* I/O Users::                   User authentication management.
* Conch Management::            Deprecated shared I/O implementation.

Pager Library

* Pager Management::            High-level interface to external pagers.
* Pager Callbacks::             Functions that the user must define.

I/O Interface

* I/O Object Ports::            How ports to I/O objects work.
* Simple Operations::           Read, write, and seek.
* Open Modes::                  State bits that affect pieces of operation.
* Asynchronous I/O::            How to be notified when I/O is possible.
* Information Queries::         How to implement @code{io_stat} and
                                  @code{io_server_version}.
* Mapped Data::                 Getting memory objects referring to the
                                  data of an I/O object.

Files

* Translators::                 Extending the Hurd filesystem hierarchy.
* Trivfs Library::              Implementing single-file translators.
* Fshelp Library::              Miscellaneous generic filesystem routines.
* File Interface::              File ports implement the file interface.
* Filesystem Interface::        Translator control interface.

Translators

* Invoking settrans::           Declaring how a node should be translated.
* Invoking showtrans::          Displaying how nodes are translated.
* Invoking mount::              Unix-compatible active filesystem translators.
* Invoking fsysopts::           Modifying translation parameters at runtime.

Trivfs Library

* Trivfs Startup::              Writing a simple trivfs-based translator.
* Trivfs Callbacks::            Mandatory user-defined trivfs functions.
* Trivfs Options::              Optional user-defined trivfs functions.
* Trivfs Ports::                Managing control and protid ports.

Fshelp Library

* Passive Translator Linkage::  Invoking passive translators.
* Active Translator Linkage::   Managing active translators.
* Fshelp Locking::              Implementing file locking.
* Fshelp Permissions::          Standard file access permission policies.
* Fshelp Misc::                 Useful standalone routines.

File Interface

* File Overview::               Basic concepts for the file interface.
* Changing Status::             Changing the owner (etc.) of a file.
* Program Execution::           Executing files.
* File Locking::                Implementing the @code{flock} call.
* File Frobbing::               Other active calls on files.
* Opening Files::               Looking up files in directories.
* Modifying Directories::       Creating and deleting nodes.
* Notifications::               File and directory change callbacks.
* File Translators::            How to set and get translators.

Stores

* Store Library::               An abstract interface to storage systems.

Store Library

* Store Arguments::             Parsing store command-line arguments.
* Store Management::            Creating and manipulating stores.
* Store I/O::                   Reading and writing data to stores.
* Store Classes::               Ready-to-use storage backends.
* Store RPC Encoding::          Transferring store descriptors via RPC.

Stored Filesystems

* Repairing Filesystems::       Recovering from minor filesystem crashes.
* Linux Extended 2 FS::         The popular Linux filesystem format.
* BSD Unix FS::                 The BSD Unix 4.x Fast File System.
* ISO-9660 CD-ROM FS::          Standard CD-ROM format.
* Diskfs Library::              Implementing new filesystem servers.

Diskfs Library

* Diskfs Startup::              Initializing stored filesystems.
* Diskfs Arguments::            Parsing command-line arguments.
* Diskfs Globals::              Global behaviour modification.
* Diskfs Node Management::      Allocation, reference counting, I/O,
                                  caching, and other disk node routines.
* Diskfs Callbacks::            Mandatory user-defined diskfs functions.
* Diskfs Options::              Optional user-defined diskfs functions.
* Diskfs Internals::            Reimplementing small pieces of diskfs.

Distributed Filesystems

* File Transfer Protocol::      A distributed filesystem based on FTP.
* Network File System::         Sun's NFS: a lousy, but common filesystem.

File Transfer Protocol

* FTP Connection Library::      Managing remote FTP server connections.

Networking

* Socket Interface::            Network communication I/O protocol.

Authentication

* Auth Interface::              Auth ports implement the auth interface.

Auth Interface

* Auth Protocol::               Bidirectional authentication.

@end detailmenu
@end menu

@end ifinfo


@node Introduction
@chapter Introduction

The GNU Hurd@footnote{The name @dfn{Hurd} stands for ``Hird of
Unix-Replacing Daemons.''  The name @dfn{Hird} stands for ``Hurd of
Interfaces Representing Depth.''} is the GNU Project's replacement for
the Unix kernel. The Hurd is a collection of servers that run on the
Mach microkernel to implement file systems, network protocols, file
access control, and other features that are normally implemented by the
Unix kernel or similar kernels (such as Linux).

@c FIXME:  Might we want to define `server' and `Mach' in a
@c glossary, and refer to those definitions here?

@menu
* Audience::                    The people for whom this manual is written.
* Features::                    Reasons to install and use the Hurd.
* Overview::                    Basic architecture of the Hurd.
* History::                     How the Hurd was born.
* Copying::                     The Hurd is free software.
@end menu


@node Audience
@section Audience

This manual is designed to be useful to everybody who is interested in
using, administering, or programming the Hurd.

If you are an end-user and you are looking for help on running the Hurd,
the first few chapters of this manual describe the essential parts of
installing, starting up, and shutting down a Hurd workstation.  If you
need help with a specific program, the best way to use this manual is to
find the program's name in the index and go directly to the appropriate
section.  You may also wish to try running @kbd{@var{program} --help},
which will display a brief usage message for @var{program}
(@pxref{Foundations}).

The rest of this manual is a technical discussion of the Hurd servers
and their implementation, and would not be helpful until you want to
learn how to modify the Hurd.

This manual is organized according to the subsystems of the Hurd, and
each chapter begins with descriptions of utilities and servers that are
related to that subsystem.  If you are a system administrator, and you
want to learn more about, say, the Hurd networking subsystem, you can
skip to the networking chapter (@pxref{Networking}), and read about the
related utilities and servers.

Programmers who are interested in learning how to modify Hurd servers, or
write new ones, should begin by learning about a microkernel to which the
Hurd has been ported (currently only GNU Mach) and reading
@ref{Foundations}.  You should then familiarize yourself with a
subsystem that interests you by reading about existing servers and the
libraries they use.  At that point, you should be able to study the
source code of existing Hurd servers and understand how they use the
Hurd libraries.

The final level of mastery is learning the about the RPC interfaces
which the Hurd libraries implement.  The last section of each chapter
describes any Hurd interfaces used in that subsystem.  Those sections
assume that you are perusing the referenced interface definitions as you
read.  After you have understood a given interface, you will be in a
good position to improve the Hurd libraries, design your own interfaces,
and implement new subsystems.


@node Features
@section Features

The Hurd is not the most advanced operating system known to the planet
(yet), but it does have a number of enticing features:

@table @asis
@item it's free software
Anybody can use, modify, and redistribute it under the terms of the GNU
General Public License (@pxref{Copying}).  The Hurd is part of the GNU
system, which is a complete operating system licensed under the GPL.

@item it's compatible
The Hurd provides a familiar programming and user environment.  For all
intents and purposes, the Hurd is a modern Unix-like kernel.  The Hurd
uses the GNU C Library, whose development closely tracks standards such
as ANSI/ISO, BSD, POSIX, Single Unix, SVID, and X/Open.

@item it's built to survive
Unlike other popular kernel software, the Hurd has an object-oriented
structure that allows it to evolve without compromising its design.
This structure will help the Hurd undergo major redesign and
modifications without having to be entirely rewritten.

@item it's scalable
The Hurd implementation is aggressively multithreaded so that it runs
efficiently on both single processors and symmetric multiprocessors.
The Hurd interfaces are designed to allow transparent network clusters
(@dfn{collectives}), although this feature has not yet been implemented.

@item it's extensible
The Hurd is an attractive platform for learning how to become a kernel
hacker or for implementing new ideas in kernel technology.  Every part
of the system is designed to be modified and extended.

@item it's stable
It is possible to develop and test new Hurd kernel components without
rebooting the machine (not even accidentally).  Running your own kernel
components doesn't interfere with other users, and so no special system
privileges are required.  The mechanism for kernel extensions is secure
by design: it is impossible to impose your changes upon other users
unless they authorize them or you are the system administrator.

@item it exists
The Hurd is real software that works Right Now.  It is not a research
project or a proposal.  You don't have to wait at all before you can
start using and developing it.
@end table


@node Overview
@section Overview

An operating system kernel provides a framework for programs to share a
computer's hardware resources securely and efficiently.  This framework
includes mechanisms for programs to communicate safely, even if they do
not trust one another (@pxref{Ports Library}).

The GNU Hurd divides up the work of the traditional kernel, and
implements it in separate programs, or @dfn{kernel servers}.  The Hurd
formally defines the communication protocols that each of the servers
understands, so that it is possible for different servers to implement
the same interface.

The GNU C Library provides a POSIX environment on the Hurd, by
translating standard POSIX system calls into interactions with the
appropriate Hurd server.


@node History
@section History

Richard Stallman (RMS) started GNU in 1983, as a project to create a
complete free operating system.  In the text of the GNU Manifesto, he
mentioned that there is a primitive kernel.  In the first GNUsletter,
Feb. 1986, he says that GNU's kernel is TRIX, which was developed at the
Massachusetts Institute of Technology.

By December of 1986, the Free Software Foundation (FSF) had ``started
working on the changes needed to TRIX'' [Gnusletter, Jan. 1987].
Shortly thereafter, the FSF began ``negotiating with Professor Rashid of
Carnegie-Mellon University about working with them on the development of
the Mach kernel'' [Gnusletter, June, 1987].  The text implies that the
FSF wanted to use someone else's work, rather than have to fix TRIX.

In [Gnusletter, Feb. 1988], RMS was talking about taking Mach and
putting the Berkeley Sprite filesystem on top of it, ``after the parts
of Berkeley Unix@dots{} have been replaced.''

Six months later, the FSF is saying that ``if we can't get Mach, we'll
use TRIX or Berkeley's Sprite.''  Here, they present Sprite as a
full-kernel option, rather than just a filesystem.

In January, 1990, they say ``we aren't doing any kernel work.  It does
not make sense for us to start a kernel project now, when we still hope
to use Mach'' [Gnusletter, Jan. 1990].  Nothing significant occurs until
1991, when a more detailed plan is announced:

@display
``We are still interested in a multi-process kernel running on top of
Mach. The CMU lawyers are currently deciding if they can release Mach
with distribution conditions that will enable us to distribute it. If
they decide to do so, then we will probably start work. CMU has
available under the same terms as Mach a single-server partial Unix
emulator named Poe; it is rather slow and provides minimal
functionality. We would probably begin by extending Poe to provide full
functionality. Later we hope to have a modular emulator divided into
multiple processes.'' [Gnusletter, Jan. 1991].
@end display

RMS explains the relationship between the Hurd and Linux in
@uref{http://www.gnu.org/software/hurd/hurd-and-linux.html}, where he
mentions that the FSF started developing the Hurd in 1990.  As of
[Gnusletter, Nov. 1991], the Hurd (running on Mach) is GNU's official
kernel.


@node Copying
@section GNU General Public License

@include gpl.texinfo


@node Bootstrap
@chapter Bootstrap

Bootstrapping@footnote{The term @dfn{bootstrapping} refers to a Dutch
legend about a boy who was able to fly by pulling himself up by his
bootstraps.  In computers, this term refers to any process where a
simple system activates a more complicated system.} is the procedure by
which your machine loads the microkernel and transfers control to the
Hurd servers.


@menu
* Bootloader::                  Starting the microkernel, or other OSes.
* Server Bootstrap::            Waking up the Hurd.
* Shutdown::                    Letting the Hurd get some rest.
@end menu

@node Bootloader
@section Bootloader

The @dfn{bootloader} is the first software that runs on your machine.
Many hardware architectures have a very simple startup routine which
reads a very simple bootloader from the beginning of the internal hard
disk, then transfers control to it.  Other architectures have startup
routines which are able to understand more of the contents of the hard
disk, and directly start a more advanced bootloader.

@cindex GRUB
@cindex GRand Unified Bootloader
Currently, @dfn{GRUB}@footnote{The GRand Unified Bootloader, available
from @uref{http://www.gnu.org/software/grub/}.} is the GNU bootloader.
GNU GRUB provides advanced functionality, and is capable of loading several
different kernels (such as Linux, DOS, and the *BSD family).

From the standpoint of the Hurd, the bootloader is just a mechanism to
get the microkernel running and transfer control to the Hurd servers.
You will need to refer to your bootloader and microkernel documentation
for more information about the details of this process.


@node Server Bootstrap
@section Server Bootstrap
@pindex serverboot

The @code{serverboot} program has been deprecated.  Newer kernels
support processing the bootscript parameters and boot the Hurd
directly.

The @code{serverboot} program is responsible for loading and executing
the rest of the Hurd servers.  Rather than containing specific
instructions for starting the Hurd, it follows general steps given in a
user-supplied boot script.

To boot the Hurd using @code{serverboot}, the microkernel must start
@code{serverboot} as its
first task, and pass it appropriate arguments.  @code{serverboot} has a
counterpart, called @code{boot}, which can be invoked while the Hurd is
already running, and allows users to start their own complete sub-Hurds
(@pxref{Recursive Bootstrap}).

@menu
* Invoking serverboot::         Starting a set of interdependent servers.
* Boot Scripts::                Describing server bootstrap relationships.
* Recursive Bootstrap::         Running a Hurd under another Hurd.
* Invoking boot::               How to use the boot program.
@end menu


@node Invoking serverboot
@subsection Invoking @code{serverboot}

The @code{serverboot} program has the following synopsis:

@example
serverboot -@var{switch}... [[@var{host-port} @var{device-port}] @var{root-name}]
@end example

@c FIXME: serverboot should accept --help and --version, for consistency
Each @var{switch} is a single character, out of the following set:

@table @samp
@item a
Prompt the user for the @var{root-name}, even if it was already supplied
on the command line.

@item d
Prompt the user to strike a key after the boot script has been read.

@item q
Prompt the user for the name of the boot script.  By default, use
@file{@var{root-name}:/boot/servers.boot}.
@end table

All the @var{switches} are put into the @code{$@{boot-args@}} script
variable.

@var{host-port} and @var{device-port} are integers which represent the
microkernel host and device ports, respectively (and are used to
initialize the @code{$@{host-port@}} and @code{$@{device-port@}} boot
script variables).  If these ports are not specified, then
@code{serverboot} assumes that the Hurd is already running, and fetches
the current ports from the procserver (FIXME xref).

@var{root-name} is the name of the microkernel device that should be
used as the Hurd bootstrap filesystem.  @code{serverboot} uses this name
to locate the boot script (described above), and to initialize the
@code{$@{root-device@}} script variable.


@node Boot Scripts
@subsection Boot Scripts
@pindex /boot/servers.boot
@pindex servers.boot

Boot Scripts are used to boot further Hurd systems in parallel to the
first, and are parsed by @code{serverboot} to boot the Hurd.  See
@file{/boot/servers.boot} for an example of a Hurd boot script.

FIXME: finish


@node Recursive Bootstrap
@subsection Recursive Bootstrap

The @code{boot} program can be used to start a set of core Hurd servers
while another Hurd is already running.  You will rarely need to do this,
and it requires superuser privileges to control the new Hurd (or allow
it to access certain devices), but it is interesting to note that it can
be done.

Usually, you would make changes to only one server, and simply tell your
programs to use it in order to test out your changes.  This process can
be applied even to the core servers.  However, some changes have
far-reaching effects, and so it is nice to be able to test those effects
without having to reboot the machine.

@c FIXME: `boot' synopsis

Here are the steps you can follow to test out a new set of servers:

@enumerate 1
@item
Create a pseudo-root device.  Usually, you would do this by creating a
new partition under your old Hurd, and initializing it with your
favorite filesystem format.  @code{boot} understands the regular
@code{libstore} options (FIXME xref), so you may use a file or other
store instead of a partition.

@example
$ @kbd{dd if=/dev/zero of=my-partition bs=1024k count=400}
400+0 records in
400+0 records out
$ @kbd{mke2fs ./my-partition}
mke2fs 1.18, 11-Nov-1999 for EXT2 FS 0.5b, 95/08/09
my-partition is not a block special device.
Proceed anyway? (y,n) @kbd{y}
Filesystem label=
OS type: GNU/Hurd
Block size=1024 (log=0)
Fragment size=1024 (log=0)
102400 inodes, 409600 blocks
20480 blocks (5.00%) reserved for the super user
First data block=1
50 block groups
8192 blocks per group, 8192 fragments per group
2048 inodes per group
Superblock backups stored on blocks:
        8193, 24577, 40961, 57345, 73729, 204801, 221185, 401409

Writing inode tables: done
Writing superblocks and filesystem accounting information: done
$
@end example

@item
Copy the core servers, C library, your modified programs, and anything
else you need onto the pseudo-root.

@example
$ @kbd{settrans -c ./my-root /hurd/ext2fs -r `pwd`/my-partition}
$ @kbd{fsysopts ./my-root --writable}
$ @kbd{cd my-root}
$ @kbd{tar -zxpf /pub/debian/FIXME/gnu-20000929.tar.gz}
$ @kbd{cd ..}
$ @kbd{fsysopts ./my-root --readonly}
$
@end example

@item
Create a new boot script (FIXME xref).

@item
Run @code{boot}.

@example
$ @kbd{boot -D ./my-boot ./my-boot/boot/servers.boot ./my-partition}
[...]
@end example

@item
Here is an example using a hard drive that already has a GNU/Hurd
system installed on an ext2 filesystem on @file{/dev/hd2s1}.

@example
$ @kbd{settrans /mnt /hurd/ex2fs --readonly /dev/hd2s1}
$ @kbd{boot -d -D /mnt -I /mnt/boot/servers.boot /dev/hd2s1}
@end example

@item
See @pxref{Invoking boot} for help with boot.
@end enumerate

Note that it is impossible to share microkernel devices between the two
running Hurds, so don't get any funny ideas.  When you're finished
testing your new Hurd, then you can run the @code{halt} or @code{reboot}
programs to return control to the parent Hurd.

@c FIXME: the `don't get any funny ideas' comment is confusing.  Am
@c I genuinely in some sort of danger if I contemplate sharing
@c microkernel devices between two running Hurds?
@c  tb: not if you know what you are doing.  But there is no clever
@c  device mediation going on.  Two hurds, with two filesystems writing
@c  the same partition, will wreak havoc.  Two hurds reading from the
@c  same terminal device will not share nicely.

If you're satisfied with your new Hurd, you can arrange for your
bootloader to start it, and reboot your machine.  Then, you'll be in a
safe place to overwrite your old Hurd with the new one, and reboot back
to your old configuration (with the new Hurd servers).


@node Invoking boot
@subsection Invoking boot

Usage: boot [@var{option}@dots{}] @var{boot-script} @var{device}@dots{}

@table @code
@item --kernel-command-line=@var{command line}
@itemx -c
Simulated multiboot command line to supply.

@item --pause
@itemx -d
Pause for user confirmation at various times during booting.

@item --boot-root=@var{dir}
@itemx -D
Root of a directory tree in which to find the files specified in
@var{boot-script}.

@item --interleave=@var{blocks}
Interleave in runs of length @var{blocks}.

@item --isig
@itemx -I
Do not disable terminal signals, so you can suspend and interrupt the
boot program itself, rather than the programs running in the booted
system.

@item --layer
@itemx -L
Layer multiple devices for redundancy.

@item --single-user
@itemx -s
Boot into single user mode.

@item --store-type=@var{type}
@itemx -T
Each @var{device} names a store of type @var{type}.
@end table

Mandatory or optional arguments to long options are also mandatory or optional
for any corresponding short options.

If neither @option{--interleave} or @option{--layer} is specified, multiple
@var{device}s are concatenated.


@node Shutdown
@section Shutdown
@scindex halt
@scindex reboot

FIXME: finish


@node Foundations
@chapter Foundations

Every Hurd program accepts the following optional arguments:

@table @samp
@item --help
Display a brief usage message, then exit.  This message is not a
substitute for reading program documentation; rather, it provides useful
reminders about specific command-line options that a program
understands.

@item --version
Output program version information and exit.
@end table

The rest of this chapter provides a programmer's introduction to the
Hurd.  If you are not a programmer, then this chapter will not make much
sense to you@dots{} you should consider skipping to descriptions of
specific Hurd programs (@pxref{Audience}).

The Hurd distribution includes many libraries in order to provide a
useful set of tools for writing Hurd utilities and servers.  Several of
these libraries are useful not only for the Hurd, but also for writing
microkernel-based programs in general.  These fundamental libraries are
not difficult to understand, and they are a good starting point, because
the rest of the Hurd relies upon them quite heavily.

@menu
* Threads Library::             Every Hurd server and library is multithreaded.
* Ports Library::               Managing server port receive rights.
* Integer Hash Library::        Integer-keyed hash tables.
* Misc Library::                Things that soon will be in the GNU C library.
* Bug Address Library::         Where to report Hurd bugs.
@end menu

@node Threads Library
@section Threads Library
@scindex libthreads
@scindex cthreads.h

All Hurd servers and libraries are aggressively multithreaded in order
to take full advantage of any multiprocessing capabilities provided by
the microkernel and the underlying hardware.  The Hurd threads library,
@code{libthreads}, contains the default Hurd thread implementation, which
is declared in @code{<cthreads.h>}.

Currently (April 1998), the Hurd uses cthreads, which have already been
documented thoroughly by CMU.  Eventually, it will be migrated to use
POSIX pthreads, which are documented in a lot of places.
@c Thomas, 26-03-1998

@c FIXME: it would be nice if we referred specifically to some of
@c the places in which POSIX pthreads are documented.
@c  tb: yes, but alas we are only allowed to refer to free
@c  documentation, and IEEE Posix ain't that... ;-(

Every single library in the Hurd distribution (including the GNU C
library) is completely thread-safe, and the Hurd servers themselves are
aggressively multithreaded.


@node Ports Library
@section Ports Library
@scindex libports
@scindex ports.h

Ports are communication channels that are held by the kernel.

A port has separate send rights and receive rights, which may be
transferred from task to task via the kernel.  Port rights are similar
to Unix file descriptors: they are per-task integers which are used to
identify ports when making kernel calls.  Send rights are required in
order to send an RPC request down a port, and receive rights are
required to serve the RPC request.  Receive rights may be aggregated
into a single @dfn{portset}, which serve as useful organizational units.

In a single-threaded RPC client, managing and categorizing ports is not
a difficult process.  However, in a complex multithreaded server, it is
useful to have a more abstract interface to managing portsets, as well
as maintaining server metadata.

The Hurd ports library, @code{libports}, fills that need.  The
@code{libports} functions are declared in @code{<hurd/ports.h>}.

@menu
* Buckets and Classes::         Basic units of port organization.
* Port Rights::                 Moving port rights to and from @code{libports}.
* Port Metadata::               Managing port-related information.
* Port References::             Guarding against leaks and lossage.
* RPC Management::              Locking and interrupting RPC operations.
@end menu

@node Buckets and Classes
@subsection Buckets and Classes

The @code{libports} @dfn{bucket} is simply a port set, with some
metadata and a lock.  All of the @code{libports} functions operate on
buckets.

@deftypefun {struct port_bucket *} ports_create_bucket (void)
Create and return a new, empty bucket.
@end deftypefun

A port @dfn{class} is a collection of individual ports, which can be
manipulated conveniently, and have enforced deallocation routines.
Buckets and classes are entirely orthogonal: there is no requirement
that all the ports in a class be in the same bucket, nor is there a
requirement that all the ports in a bucket be in the same class.

@deftypefun {struct port_class} ports_create_class (@w{void (*@var{clean_routine}) (void *@var{port})}, @w{void (*@var{dropweak_routine}) (void *@var{port})})
Create and return a new port class.  If nonzero, @var{clean_routine}
will be called for each allocated port object in this class when it is
being destroyed.  If nonzero, @var{dropweak_routine} will be called to
request weak references to be dropped.  (If @var{dropweak_routine} is
null, then weak references and hard references will be identical for
ports of this class.)
@end deftypefun

Once you have created at least one bucket and class, you may create new
ports, and store them in those buckets.  There are a few different
functions for port creation, depending on your application's
requirements:

@deftypefun error_t ports_create_port (@w{struct port_class *@var{class}}, @w{struct port_bucket *@var{bucket}}, @w{size_t @var{size}}, @w{void *@var{result}})
Create and return in @var{result} a new port in @var{class} and
@var{bucket}; @var{size} bytes will be allocated to hold the port
structure and whatever private data the user desires.
@end deftypefun

@deftypefun error_t ports_create_port_noinstall (@w{struct port_class *@var{class}}, @w{struct port_bucket *@var{bucket}}, @w{size_t @var{size}}, @w{void *@var{result}})
Just like @code{ports_create_port}, except don't actually put the port
into the portset underlying @var{bucket}.  This is intended to be used
for cases where the port right must be given out before the port is
fully initialized; with this call you are guaranteed that no RPC service
will occur on the port until you have finished initializing it and
installed it into the portset yourself.
@end deftypefun

@deftypefun error_t ports_import_port (@w{struct port_class *@var{class}}, @w{struct port_bucket *@var{bucket}}, @w{mach_port_t @var{port}}, @w{size_t @var{size}}, @w{void *@var{result}})
For an existing @emph{receive} right, create and return in @var{result}
a new port structure; @var{bucket}, @var{size}, and @var{class} args are
as for @code{ports_create_port}.
@end deftypefun


@node Port Rights
@subsection Port Rights

The following functions move port receive rights to and from the port
structure:

@deftypefun void ports_reallocate_port (@w{void *@var{port}})
Destroy the receive right currently associated with @var{port} and
allocate a new one.
@end deftypefun

@deftypefun void ports_reallocate_from_external (@w{void *@var{port}}, @w{mach_port_t @var{receive}})
Destroy the receive right currently associated with @var{port} and
designate @var{receive} as the new one.
@end deftypefun

@deftypefun void ports_destroy_right (@w{void *@var{port}})
Destroy the receive right currently associated with @var{port}.  After
this call, @code{ports_reallocate_port} and
@code{ports_reallocate_from_external} may not be used.
@end deftypefun

@deftypefun mach_port_t ports_claim_right (@w{void *@var{port}})
Return the receive right currently associated with @var{port}.  The
effects on @var{port} are the same as in @code{ports_destroy_right},
except that the receive right itself is not affected.  Note that in
multi-threaded servers, messages might already have been dequeued for
this port before it gets removed from the portset; such messages will
get @code{EOPNOTSUPP} errors.
@end deftypefun

@deftypefun error_t ports_transfer_right (@w{void *@var{topt}}, @w{void *@var{frompt}})
Transfer the receive right from @var{frompt} to @var{topt}.
@var{frompt} ends up with a destroyed right (as if
@code{ports_destroy_right} were called) and @var{topt}'s old right is
destroyed (as if @code{ports_reallocate_from_external} were called).
@end deftypefun

@deftypefun mach_port_t ports_get_right (@w{void *@var{port}})
Return the name of the receive right associated with @var{port}.  The
user is responsible for creating an ordinary send right from this name.
@end deftypefun


@node Port Metadata
@subsection Port Metadata

It is important to point out that the @var{port} argument to each of
the @code{libports} functions is a @code{void *} and not a @code{struct
port_info *}.  This is done so that you may add arbitrary
meta-information to your @code{libports}-managed ports.  Simply define
your own structure whose first element is a @code{struct port_info}, and
then you can use pointers to these structures as the @var{port} argument
to any @code{libports} function.

The following functions are useful for maintaining metadata that is
stored in your own custom ports structure:

@deftypefun {void *} ports_lookup_port (@w{struct port_bucket *@var{bucket}}, @w{mach_port_t @var{port}}, @w{struct port_class *@var{class}})
Look up @var{port} and return the associated port structure, allocating
a reference.  If the call fails, return zero.  If @var{bucket} is nonzero,
then it specifies a bucket to search; otherwise all buckets will be
searched.  If @var{class} is nonzero, then the lookup will fail if
@var{port} is not in @var{class}.
@end deftypefun

@deftypefun error_t ports_bucket_iterate (@w{struct port_bucket *@var{bucket}}, @w{error_t (*@var{fun}) (void *@var{port})})
Call @var{fun} once for each port in @var{bucket}.  No guarantee is made
about the order of iteration, which might vary from call to call.  If
FUN returns an error, then no further calls to FUN are made for any
remaining ports, and the return value of FUN is returned from
ports_bucket_iterate.
@end deftypefun

@node Port References
@subsection Port References

These functions maintain references to ports so that the port
information structures may be freed if and only if they are no longer
needed.  It is your responsibility to tell @code{libports} when
references to ports change.

@deftypefun void ports_port_ref (@w{void *@var{port}})
Allocate a hard reference to @var{port}.
@end deftypefun

@deftypefun void ports_port_deref (@w{void *@var{port}})
Drop a hard reference to @var{port}.
@end deftypefun

@deftypefun void ports_no_senders (@w{void *@var{port}}, @w{mach_port_mscount_t @var{mscount}})
The user is responsible for listening for no senders notifications; when
one arrives, call this routine for the @var{port} the message was sent
to, providing the @var{mscount} from the notification.
@end deftypefun

@deftypefun int ports_count_class (@w{struct port_class *@var{class}})
Block creation of new ports in @var{class}.  Return the number of ports
currently in @var{class}.
@end deftypefun

@deftypefun int ports_count_bucket (@w{struct port_bucket *@var{bucket}})
Block creation of new ports in @var{bucket}.  Return the number of ports
currently in @var{bucket}.
@end deftypefun

@deftypefun void ports_enable_class (@w{struct port_class *@var{class}})
Permit suspended port creation (blocked by @code{ports_count_class}) to
continue.
@end deftypefun

@deftypefun void ports_enable_bucket (@w{struct port_bucket *@var{bucket}})
Permit suspended port creation (blocked by @code{ports_count_bucket}) to
continue.
@end deftypefun

Weak references are not often used, as they are the same as hard
references for port classes where @var{dropweak_routine} is null.
@xref{Buckets and Classes}.

@deftypefun void ports_port_ref_weak (@w{void *@var{port}})
Allocate a weak reference to @var{port}.
@end deftypefun

@deftypefun void ports_port_deref_weak (@w{void *@var{port}})
Drop a weak reference to @var{port}.
@end deftypefun


@node RPC Management
@subsection RPC Management

The rest of the @code{libports} functions are dedicated to controlling
RPC operations.  These functions help you do all the locking and thread
cancellations that are required in order to build robust servers.

@deftypefn {Typedef} typedef int (*ports_demuxer_type) (@w{mach_msg_header_t *@var{inp}}, @w{mach_msg_header_t *@var{outp}})
Type of MiG demuxer routines.
@end deftypefn

@c FIXME: Should I know what `MiG' means?
@c  tb: Yeah, it's the Mach Interface Generator.

@deftypefun error_t ports_begin_rpc (@w{void *@var{port}}, @w{mach_msg_id_t @var{msg_id}}, @w{struct rpc_info *@var{info}})
Call this when an RPC is beginning on @var{port}.  @var{info} should be
allocated by the caller and will be used to hold dynamic state.  If this
RPC should be abandoned, return @code{EDIED}; otherwise we return zero.
@end deftypefun

@deftypefun void ports_end_rpc (@w{void *@var{port}}, @w{struct rpc_info *@var{info}})
Call this when an RPC is concluding.  The arguments must match the ones
passed to the paired call to @code{ports_begin_rpc}.
@end deftypefun

@deftypefun void ports_manage_port_operations_one_thread (@w{struct port_bucket *@var{bucket}}, @w{ports_demuxer_type @var{demuxer}}, @w{int @var{timeout}})
Begin handling operations for the ports in @var{bucket}, calling
@var{demuxer} for each incoming message.  Return if @var{timeout} is
nonzero and no messages have been received for @var{timeout}
milliseconds.  Use only one thread (the calling thread).
@end deftypefun

@deftypefun void ports_manage_port_operations_multithread (@w{struct port_bucket *@var{bucket}}, @w{ports_demuxer_type @var{demuxer}}, @w{int @var{thread_timeout}}, @w{int @var{global_timeout}}, @w{void (*@var{hook}) (void)})
Begin handling operations for the ports in @var{bucket}, calling
@var{demuxer} for each incoming message.  Return if @var{global_timeout}
is nonzero and no messages have been received for @var{global_timeout}
milliseconds.  Create threads as necessary to handle incoming messages
so that no port is starved because of sluggishness on another port.  If
@var{thread_timeout} is nonzero, then individual threads will die off
if they handle no incoming messages for @var{local_timeout}
milliseconds.  If non-null, @var{hook} will be called in each new thread
immediately after it is created.
@end deftypefun

@deftypefun error_t ports_inhibit_port_rpcs (@w{void *@var{port}})
Interrupt any pending RPC on @var{port}.  Wait for all pending RPCs to
finish, and then block any new RPCs starting on that port.
@end deftypefun

@deftypefun error_t ports_inhibit_class_rpcs (@w{struct port_class *@var{class}})
Similar to @code{ports_inhibit_port_rpcs}, but affects all ports in
@var{class}.
@end deftypefun

@deftypefun error_t ports_inhibit_bucket_rpcs (@w{struct port_bucket *@var{bucket}})
Similar to @code{ports_inhibit_port_rpcs}, but affects all ports in
@var{bucket}.
@end deftypefun

@deftypefun error_t ports_inhibit_all_rpcs (void)
Similar to @code{ports_inhibit_port_rpcs}, but affects all ports
whatsoever.
@end deftypefun

@deftypefun void ports_resume_port_rpcs (@w{void *@var{port}})
Reverse the effect of a previous @code{ports_inhibit_port_rpcs} for this
@var{port}, allowing blocked RPCs to continue.
@end deftypefun

@deftypefun void ports_resume_class_rpcs (@w{struct port_class *@var{class}})
Reverse the effect of a previous @code{ports_inhibit_class_rpcs} for
@var{class}.
@end deftypefun

@deftypefun void ports_resume_bucket_rpcs (@w{struct port_bucket *@var{bucket}})
Reverse the effect of a previous @code{ports_inhibit_bucket_rpcs} for
@var{bucket}.
@end deftypefun

@deftypefun void ports_resume_all_rpcs (void)
Reverse the effect of a previous @code{ports_inhibit_all_rpcs}.
@end deftypefun

@deftypefun void ports_interrupt_rpcs (@w{void *@var{port}})
Cancel (with @code{thread_cancel}) any RPCs in progress on @var{port}.
@end deftypefun

@deftypefun int ports_self_interrupted (void)
If the current thread's RPC has been interrupted with
@code{ports_interrupt_rpcs}, return nonzero and clear the interrupted
flag.
@end deftypefun

@deftypefun error_t ports_interrupt_rpc_on_notification (@w{void *@var{object}}, @w{struct rpc_info *@var{rpc}}, @w{mach_port_t @var{port}}, @w{mach_msg_id_t @var{what}})
Arrange for @code{hurd_cancel} to be called on @var{rpc}'s thread if
@var{object} gets notified that any of the things in @var{what} have
happened to @var{port}.  @var{rpc} should be an RPC on @var{object}.
@end deftypefun

@deftypefun error_t ports_interrupt_self_on_notification (@w{void *@var{object}}, @w{mach_port_t @var{port}}, @w{mach_msg_id_t @var{what}})
Arrange for @code{hurd_cancel} to be called on the current thread, which
should be an RPC on @var{object}, if @var{port} gets notified with the
condition @var{what}.
@end deftypefun

@deftypefun error_t ports_interrupt_self_on_port_death (@w{void *@var{object}}, @w{mach_port_t @var{port}})
Same as calling @code{ports_interrupt_self_on_notification} with
@var{what} set to @code{MACH_NOTIFY_DEAD_NAME}.
@end deftypefun

@deftypefun void ports_interrupt_notified_rpcs (@w{void *@var{object}}, @w{mach_port_t @var{port}}, @w{mach_msg_id_t @var{what}})
Interrupt any RPCs on @var{object} that have requested such.
@end deftypefun

@deftypefun void ports_dead_name (@w{void *@var{object}}, @w{mach_port_t @var{port}})
Same as calling @code{ports_interrupt_notified_rpcs} with @var{what} set
to @code{MACH_NOTIFY_DEAD_NAME}.
@end deftypefun


@node Integer Hash Library
@section Integer Hash Library
@scindex libihash
@scindex ihash.h

@code{libihash} provides integer-keyed hash tables, for arbitrary
element data types.  Such hash tables are frequently used when
implementing sparse arrays or buffer caches.

The following functions are declared in @code{<hurd/ihash.h>}:

@deftypefun error_t ihash_create (@w{ihash_t *@var{ht}})
Create an integer hash table and return it in @var{ht}.  If a memory
allocation error occurs, @code{ENOMEM} is returned, otherwise zero.
@end deftypefun

@deftypefun void ihash_free (@w{ihash_t @var{ht}})
Free @var{ht} and all resources it consumes.
@end deftypefun

@deftypefun void ihash_set_cleanup (@w{ihash_t @var{ht}}, @w{void (*@var{cleanup}) (void *@var{value}, void *@var{arg})}, @w{void *@var{arg}})
Sets @var{ht}'s element cleanup function to @var{cleanup}, and its
second argument to @var{arg}.  @var{cleanup} will be called on every
element @var{value} to be subsequently overwritten or deleted, with
@var{arg} as the second argument.
@end deftypefun

@deftypefun error_t ihash_add (@w{ihash_t @var{ht}}, @w{int @var{id}}, @w{void *@var{item}}, @w{void ***@var{locp}})
Add @var{item} to the hash table @var{ht} under the integer key
@var{id}.  @var{locp} is the address of a pointer located in @var{item};
If non-null, @var{locp} should point to a variable of type @code{void
**}, and will be filled with a pointer that may be used as an argument
to @code{ihash_locp_remove}.  The variable pointed to by @var{locp} may
be overwritten sometime between this call and when the element is
deleted, so you cannot stash its value elsewhere and hope to use the
stashed value with @code{ihash_locp_remove}.  If a memory allocation
error occurs, @code{ENOMEM} is returned, otherwise zero.
@end deftypefun

@deftypefun {void *} ihash_find (@w{ihash_t @var{ht}}, @w{int @var{id}})
Find and return the item in hash table @var{ht} with key @var{id}.
Returns null if the specified item doesn't exist.
@end deftypefun

@deftypefun error_t ihash_iterate (@w{ihash_t @var{ht}}, @w{error_t (*@var{fun}) (void *@var{value})})
Call function @var{fun} on every element of @var{ht}.  @var{fun}'s only
arg, @var{value}, is a pointer to the value stored in the hash table.  If
@var{fun} ever returns nonzero, then iteration stops and
@code{ihash_iterate} returns that value, otherwise it (eventually)
returns 0.
@end deftypefun

@deftypefun int ihash_remove (@w{ihash_t @var{ht}}, @w{int @var{id}})
Remove the entry with a key of @var{id} from @var{ht}.  If there was no
such element, then return zero, otherwise nonzero.
@end deftypefun

@deftypefun void ihash_locp_remove (@w{ihash_t @var{ht}}, @w{void **@var{ht_locp}})
Remove the entry at @var{locp} from the hashtable @var{ht}.  @var{locp}
is as returned from an earlier call to @code{ihash_add}.  This call
should be faster than @code{ihash_remove}.  @var{ht} can be null, in
which case the call still succeeds, but no cleanup is done.
@end deftypefun


@node Misc Library
@section Misc Library
@scindex libshouldbeinlibc

The GNU C library is constantly developing to meet the needs of the
Hurd.  However, because the C library needs to be very stable, it is
irresponsible to add new functions to it without carefully specifying
their interface, and testing them thoroughly.

The Hurd distribution includes a library called
@code{libshouldbeinlibc}, which serves as a proving ground for additions
to the GNU C library.  This library is in flux, as some functions are
added to it by the Hurd developers and others are moved to the official
C library.

These functions aren't currently documented (other than in their header
files), but complete documentation will be added to
@iftex
@emph{The GNU C Library Reference Manual}
@end iftex
@ifinfo
@ref{Top, The GNU C Library Reference Manual,, libc},
@end ifinfo
when these functions become part of the GNU C library.


@node Bug Address Library
@section Bug Address Library
@scindex libhurdbugaddr

@code{libhurdbugaddr} exists only to define a single variable:

@deftypevar {char *} argp_program_bug_address
@code{argp_program_bug_address} is the default Hurd bug-reporting e-mail
address, @email{bug-hurd@@gnu.org}.  This address is displayed to the
user when any of the standard Hurd servers and utilities are invoked
using the @samp{--help} option.
@end deftypevar


@node Input and Output
@chapter Input and Output

There are no specific programs or servers associated with the I/O
subsystem, since it is used to interact with almost all servers in the
GNU Hurd.  It provides facilities for reading and writing I/O channels,
which are the underlying implementation of file and socket descriptors
in the GNU C library.

@menu
* Iohelp Library::              I/O authentication and lock management.
* Pager Library::               Implementing multithreaded external pagers.
* I/O Interface::               RPC-based input/output channels.
@end menu

@node Iohelp Library
@section Iohelp Library
@scindex libiohelp
@scindex iohelp.h

The @code{<hurd/iohelp.h>} file declares several functions which are
useful for low-level I/O implementations.  Most Hurd servers do not call
these functions directly, but they are used by several of the Hurd
filesystem and networking helper libraries.  @code{libiohelp} requires
@code{libthreads}.

@menu
* I/O Users::                   User authentication management.
* Conch Management::            Deprecated shared I/O implementation.
@end menu

@node I/O Users
@subsection I/O Users

Most I/O servers need to implement some kind of user authentication
checking.  In order to facilitate that process, @code{libiohelp} has
some functions which encapsulate a set of idvecs (FIXME: xref to C
library) in a single @code{struct iouser}.

@deftypefun {struct iouser *} iohelp_create_iouser (@w{struct idvec *@var{uids}}, @w{struct idvec *@var{gids}})
Create a new @var{iouser} for the specified @var{uids} and @var{gids}.
@end deftypefun

@deftypefun {struct iouser *} iohelp_dup_iouser (@w{struct iouser *@var{iouser}})
Return a copy of @var{iouser}.
@end deftypefun

@deftypefun void iohelp_free_iouser (@w{struct iouser *@var{iouser}})
Release a reference to @var{iouser}.
@end deftypefun

I/O reauthentication is a rather complex protocol involving the
authserver as a trusted third party (@pxref{Auth Protocol}).  In order
to reduce the risk of flawed implementations, I/O reauthentication is
encapsulated in the @code{iohelp_reauth} function:

@deftypefun {struct iouser *} iohelp_reauth (@w{auth_t @var{authserver}}, @w{mach_port_t @var{rend_port}}, @w{mach_port_t @var{newright}}, @w{int @var{permit_failure}})
Conduct a reauthentication transaction, and return a new @var{iouser}.
@var{authserver} is the I/O server's auth port.  The rendezvous port
provided by the user is @var{rend_port}.

If the transaction cannot be completed, return zero, unless
@var{permit_failure} is nonzero.  If @var{permit_failure} is nonzero,
then should the transaction fail, return an @var{iouser} that has no
ids.  The new port to be sent to the user is @var{newright}.
@end deftypefun


@node Conch Management
@subsection Conch Management

@cindex conch
@findex iohelp_initialize_conch
@findex iohelp_handle_io_get_conch
@findex iohelp_get_conch
@findex iohelp_handle_io_release_conch
@findex iohelp_verify_user_conch
@findex iohelp_fetch_shared_data
@findex iohelp_put_shared_data
The @dfn{conch} is at the heart of the shared memory I/O system.
Several Hurd libraries implement shared I/O, and so @code{libiohelp}
contains functions to facilitate conch management.

Everything about shared I/O is undocumented because it is not needed for
adequate performance, and the RPC interface is simpler (@pxref{I/O
Interface}).  It is not useful for new libraries or servers to implement
shared I/O.


@node Pager Library
@section Pager Library
@scindex libpager
@scindex pager.h

@cindex XP (external pager)
@cindex external pager (XP)
The @dfn{external pager} (@dfn{XP}) microkernel interface allows
applications to provide the backing store for a memory object, by
converting hardware page faults into RPC requests.  External pagers are
required for memory-mapped I/O (@pxref{Mapped Data}) and stored
filesystems (@pxref{Stored Filesystems}).

The external pager interface is quite complex, so the Hurd pager library
contains functions which aid in creating multithreaded external pagers.
@code{libpager} is declared in @code{<hurd/pager.h>}, and requires only
the threads and ports libraries.

@menu
* Pager Management::            High-level interface to external pagers.
* Pager Callbacks::             Functions that the user must define.
@end menu


@node Pager Management
@subsection Pager Management

The pager library defines the @code{struct pager} data type in order to
represent a multi-threaded pager.  The general procedure for creating a
pager is to define the functions listed in @ref{Pager Callbacks},
allocate a @code{libports} bucket for the ports which will access the
pager, and create at least one new @code{struct pager} with
@code{pager_create}.

@deftypefun {struct pager *} pager_create (@w{struct user_pager_info *@var{u_pager}}, @w{struct port_bucket *@var{bucket}}, @w{boolean_t @var{may_cache}}, @w{memory_object_copy_strategy_t @var{copy_strategy}})
Create a new pager.  The pager will have a port created for it (using
@code{libports}, in @var{bucket}) and will be immediately ready to
receive requests.  @var{u_pager} will be provided to later calls to
@code{pager_find_address}.  The pager will have one user reference
created.  @var{may_cache} and @var{copy_strategy} are the original
values of those attributes as for @code{memory_object_ready}.  Users may
create references to pagers by use of the relevant ports library
functions.  On errors, return null and set @code{errno}.
@end deftypefun

Once you are ready to turn over control to the pager library, you should
call @code{ports_manage_port_operations_multithread} on the
@var{bucket}, using @code{pager_demuxer} as the ports @var{demuxer}.
This will handle all external pager RPCs, invoking your pager callbacks
when necessary.

@deftypefun int pager_demuxer (@w{mach_msg_header_t *@var{inp}}, @w{mach_msg_header_t *@var{outp}})
Demultiplex incoming @code{libports} messages on pager ports.
@end deftypefun

The following functions are the body of the pager library, and provide a
clean interface to pager functionality:

@deftypefun void pager_sync (@w{struct pager *@var{pager}}, @w{int @var{wait}})
@deftypefunx void pager_sync_some (@w{struct pager *@var{pager}}, @w{vm_address_t @var{start}}, @w{vm_size_t @var{len}}, @w{int @var{wait}})
Write data from pager @var{pager} to its backing store.  Wait for all
the writes to complete if and only if @var{wait} is set.

@code{pager_sync} writes all data; @code{pager_sync_some} only writes
data starting at @var{start}, for @var{len} bytes.
@end deftypefun

@deftypefun void pager_flush (@w{struct pager *@var{pager}}, @w{int @var{wait}})
@deftypefunx void pager_flush_some (@w{struct pager *@var{pager}}, @w{vm_address_t @var{start}}, @w{vm_size_t @var{len}}, @w{int @var{wait}})
Flush data from the kernel for pager @var{pager} and force any pending
delayed copies.  Wait for all pages to be flushed if and only if
@var{wait} is set.

@code{pager_flush} flushes all data; @code{pager_flush_some} only
flushes data starting at @var{start}, for @var{len} bytes.
@end deftypefun

@deftypefun void pager_return (@w{struct pager *@var{pager}}, @w{int @var{wait}})
@deftypefunx void pager_return_some (@w{struct pager *@var{pager}}, @w{vm_address_t @var{start}}, @w{vm_size_t @var{len}}, @w{int @var{wait}})
Flush data from the kernel for pager @var{pager} and force any pending
delayed copies.  Wait for all pages to be flushed if and only if
@var{wait} is set.  Have the kernel write back modifications.

@code{pager_return} flushes and restores all data;
@code{pager_return_some} only flushes and restores data starting at
@var{start}, for @var{len} bytes.
@end deftypefun

@deftypefun void pager_offer_page (@w{struct pager *@var{pager}}, @w{int @var{precious}}, @w{int @var{writelock}}, @w{vm_offset_t @var{page}}, @w{vm_address_t @var{buf}})
Offer a page of data to the kernel.  If @var{precious} is set, then this
page will be paged out at some future point, otherwise it might be
dropped by the kernel.  If the page is currently in core, the kernel
might ignore this call.
@end deftypefun

attributes@deftypefun void pager_change_attributes (@w{struct pager *@var{pager}}, @w{boolean_t @var{may_cache}}, @w{memory_object_copy_strategy_t @var{copy_strategy}}, @w{int @var{wait}})
Change the attributes of the memory object underlying pager @var{pager}.
The @var{may_cache} and @var{copy_strategy} arguments are as for
@code{memory_object_change_}.  Wait for the kernel to report
completion if and only if @var{wait} is set.
@end deftypefun

@deftypefun void pager_shutdown (@w{struct pager *@var{pager}})
Force termination of a pager.  After this returns, no more paging
requests on the pager will be honoured, and the pager will be
deallocated.  The actual deallocation might occur asynchronously if
there are currently outstanding paging requests that will complete
first.
@end deftypefun

@deftypefun error_t pager_get_error (@w{struct pager *@var{p}}, @w{vm_address_t @var{addr}})
Return the error code of the last page error for pager @var{p} at
address @var{addr}.@footnote{Note that this function will be deleted
when the Mach pager interface is fixed to provide this information.}
@end deftypefun

@deftypefun error_t pager_memcpy (@w{struct pager *@var{pager}}, @w{memory_object_t @var{memobj}}, @w{vm_offset_t @var{offset}}, @w{void *@var{other}}, @w{size_t *@var{size}}, @w{vm_prot_t @var{prot}})
Try to copy @code{*@var{size}} bytes between the region @var{other}
points to and the region at @var{offset} in the pager indicated by
@var{pager} and @var{memobj}.  If @var{prot} is @code{VM_PROT_READ},
copying is from the pager to @var{other}; if @var{prot} contains
@code{VM_PROT_WRITE}, copying is from @var{other} into the pager.
@code{*@var{size}} is always filled in with the actual number of bytes
successfully copied.  Returns an error code if the pager-backed memory
faults; if there is no fault, returns zero and @code{*@var{size}} will
be unchanged.
@end deftypefun

These functions allow you to recover the internal @code{struct pager}
state, in case the @code{libpager} interface doesn't provide an
operation you need:

@deftypefun {struct user_pager_info *} pager_get_upi (@w{struct pager *@var{p}})
Return the @code{struct user_pager_info} associated with a pager.
@end deftypefun

@deftypefun mach_port_t pager_get_port (@w{struct pager *@var{pager}})
Return the port (receive right) for requests to the pager.  It is
absolutely necessary that a new send right be created from this receive
right.
@end deftypefun


@node Pager Callbacks
@subsection Pager Callbacks

Like several other Hurd libraries, @code{libpager} depends on you to
implement application-specific callback functions.  You @emph{must}
define the following functions:

@deftypefun error_t pager_read_page (@w{struct user_pager_info *@var{pager}}, @w{vm_offset_t @var{page}}, @w{vm_address_t *@var{buf}}, @w{int *@var{write_lock}})
For pager @var{pager}, read one page from offset @var{page}.  Set
@code{*@var{buf}} to be the address of the page, and set
@code{*@var{write_lock}} if the page must be provided read-only.  The
only permissible error returns are @code{EIO}, @code{EDQUOT}, and
@code{ENOSPC}.
@end deftypefun

@deftypefun error_t pager_write_page (@w{struct user_pager_info *@var{pager}}, @w{vm_offset_t @var{page}}, @w{vm_address_t @var{buf}})
For pager @var{pager}, synchronously write one page from @var{buf} to
offset @var{page}.  In addition, @code{vm_deallocate} (or equivalent)
@var{buf}.  The only permissible error returns are @code{EIO},
@code{EDQUOT}, and @code{ENOSPC}.
@end deftypefun

@deftypefun error_t pager_unlock_page (@w{struct user_pager_info *@var{pager}}, @w{vm_offset_t @var{address}})
A page should be made writable.
@end deftypefun

@deftypefun error_t pager_report_extent (@w{struct user_pager_info *@var{pager}}, @w{vm_address_t *@var{offset}}, @w{vm_size_t *@var{size}})
This function should report in @code{*@var{offset}} and
@code{*@var{size}} the minimum valid address the pager will accept and
the size of the object.
@end deftypefun

@deftypefun void pager_clear_user_data (@w{struct user_pager_info *@var{pager}})
This is called when a pager is being deallocated after all extant send
rights have been destroyed.
@end deftypefun

@deftypefun void pager_dropweak (@w{struct user_pager_info *@var{p}})
This will be called when the ports library wants to drop weak
references.  The pager library creates no weak references itself, so if
the user doesn't either, then it is all right for this function to do
nothing.
@end deftypefun


@node I/O Interface
@section I/O Interface
@scindex io.defs

The I/O interface facilities are described in @code{<hurd/io.defs>}.
This section discusses only RPC-based I/O operations.@footnote{The
latter portion of @code{<hurd/io.defs>} and all of
@code{<hurd/shared.h>} describe how to implement shared-memory I/O
operations.  However, shared I/O has been deprecated.  @xref{Conch
Management}, for more details.}

@menu
* I/O Object Ports::            How ports to I/O objects work.
* Simple Operations::           Read, write, and seek.
* Open Modes::                  State bits that affect pieces of operation.
* Asynchronous I/O::            How to be notified when I/O is possible.
* Information Queries::         How to implement @code{io_stat} and
                                  @code{io_server_version}.
* Mapped Data::                 Getting memory objects referring to the
                                  data of an I/O object.
@end menu

@node I/O Object Ports
@subsection I/O Object Ports

The I/O server must associate each I/O port with a particular set of
uids and gids, identifying the user who is responsible for operations on
the port.  Every port to an I/O server should also support either the
file protocol (@pxref{File Interface}) or the socket protocol
(@pxref{Socket Interface}); naked I/O ports are not allowed.

In addition, the server associates with each port a default file
pointer, a set of open mode bits, a pid (called the ``owner''), and some
underlying object which can absorb data (for write) or provide data (for
read).

The uid and gid sets associated with a port may not be visibly shared
with other ports, nor may they ever change.  The server must fix the
identification of a set of uids and gids with a particular port at the
moment of the port's creation.  The other characteristics of an I/O port
may be shared with other users.  The I/O server interface does not
generally specify the way in which servers may share these other
characteristics (with the exception of the deprecated
@code{O_ASYNC} interface); however, the file and socket interfaces make
further requirements about what sharing is required and what sharing is prohibited.

In general, users get send rights to I/O ports by some mechanism that is
external to the I/O protocol.  (For example, fileservers give out I/O
ports in response to the @code{dir_lookup} and @code{fsys_getroot}
calls.  Socket servers give out ports in response to the
@code{socket_create} and @code{socket_accept} calls.)  However, the I/O
protocol provides methods of obtaining new ports that refer to the same
underlying object as another port.  In response to all of these calls,
all underlying state (including, but not limited to, the default file
pointer, open mode bits, and underlying object) must be shared between
the old and new ports.  In the following descriptions of these calls,
the term ``identical'' means this kind of sharing.  All these calls must
return send rights to a newly-constructed Mach port.

@c FIXME: should be say `Mach' above, or should we say
@c `microkernel'?
@c  tb: We say Mach.  Other kernels might have different rules, and we
@c  should document what we have now.

@findex io_duplicate
The @code{io_duplicate} call simply returns another port which is
identical to an existing port and has the same uid and gid set.

@findex io_restrict_auth
The @code{io_restrict_auth} call returns another port, identical to the
provided port, but which has a smaller associated uid and gid set.  The
uid and gid sets of the new port are the intersection of the set on the
existing port and the lists of uids and gids provided in the call.

@findex io_reauthenticate
Users use the @code{io_reauthenticate} call when they wish to have an
entirely new set of uids or gids associated with a port.  In response to
the @code{io_reauthenticate} call, the server must create a new port,
and then make the call @code{auth_server_authenticate} to the auth
server.  The rendezvous port for the @code{auth_server_authenticate}
call is the I/O port to which was made the @code{io_reauthenticate}
call.  The server provides the @var{rend_int} parameter to the auth
server as a copy from the corresponding parameter in the
@code{io_reauthenticate} call.  The I/O server also gives the auth
server a new port; this must be a newly created port identical to the
old port.  The authserver will return the set of uids and gids
associated with the user, and guarantees that the new port will go
directly to the user that possessed the associated authentication port.
The server then identifies the new port given out with the specified
ID's.

@node Simple Operations
@subsection Simple Operations

@findex io_write
Users write to I/O ports by calling the @code{io_write} RPC.  They
specify an @var{offset} parameter; if the object supports writing at
arbitrary offsets, the server should honour this parameter.  If @math{-1}
is passed as the offset, then the server should use the default file
pointer.  The server should return the amount of data which was
successfully written.  If the operation was interrupted after some but
not all of the data was written, then it is considered to have succeeded
and the server should return the amount written.  If the port is not an
I/O port at all, the server should reply with the error
@code{EOPNOTSUPP}.  If the port is an I/O port, but does not happen to
support writing, then the correct error is @code{EBADF}.

@findex io_read
Users read from I/O ports by calling the @code{io_read} RPC.  They
specify the amount of data they wish to read, and the offset.  The offset
has the same meaning as for @code{io_write} above.  The server should
return the data that was read.  If the call is interrupted after some
data has been read (and the operation is not idempotent) then the server
should return the amount read, even if it was less than the amount requested.
The server should return as much data as possible, but never more than
requested by the user.  If there is no data, but there might be later,
the call should block until data becomes available.  The server indicates
end-of-file by returning zero bytes.  If the call is
interrupted after some data has been read, but the call is idempotent,
then the server may return @code{EINTR} rather than actually filling the
buffer (taking care that any modifications of the default file pointer
have been reversed).  Preferably, however, servers should return data.

There are two categories of objects: seekable and non-seekable.
Seekable objects must accept arbitrary offset parameters in the
@code{io_read} and @code{io_write} calls, and must implement the
@code{io_seek} call.  Non-seekable objects must ignore the offset
parameters to @code{io_read} and @code{io_write}, and should return
@code{ESPIPE} to the @code{io_seek} call.

@c FIXME: should that last `should' be replaced with `must'?
@c  tb: maybe, but perhaps not.  There might be a reason to implement a
@c  semi-seekable object which permits some but not all of these
@c  operations.  In the case of the Hurd interfaces (as opposed to
@c  libraries) I like to be a little looser about this.  The rule is "do
@c  what the interface says unless you really understand it and have a
@c  good reason to do something different".

@findex io_seek
On seekable objects, @code{io_seek} changes the default file pointer for
reads and writes.  (@xref{File Positioning, , , libc, The GNU C Library
Reference Manual},
for the interpretation of the @var{whence} and @var{offset} arguments.)
It returns the new offset as modified by @code{io_seek}.

@findex io_readable
The @code{io_readable} interface returns the amount of data which can be
immediately read.  For the special technical meaning of ``immediately'',
see @ref{Asynchronous I/O}.

@node Open Modes
@subsection Open Modes

@findex io_set_all_openmodes
@findex io_get_openmodes
@findex io_set_some_openmodes
@findex io_clear_some_openmodes
The server associates each port with a set of bits that affect its
operation.  The @code{io_set_all_openmodes} call modifies these bits and
the @code{io_get_openmodes} call returns them.  In addition, the
@code{io_set_some_openmodes} and @code{io_clear_some_openmodes} do an
atomic read/modify/write of the openmodes.

The @code{O_APPEND} bit, when set, changes the behaviour of
@code{io_write} when it uses the default file pointer on seekable
objects.  When @code{io_write} is done on a port with the
@code{O_APPEND} bit set, is must set the file pointer to the current
file size before doing the write (which would then increment the file
pointer as usual).  The @dfn{current file size} is the smallest offset
which returns end-of-file when provided to @code{io_read}.  The server
must atomically bind this update to the actual data write with respect
to other users of @code{io_read}, @code{io_write}, and @code{io_seek}.

The @code{O_FSYNC} bit, when set, guarantees that @code{io_write} will
not return until data is fully written to the underlying medium.

The @code{O_NONBLOCK} bit, when set, prevents read and write from
blocking.  They should copy such data as is immediately available.  If
no data is immediately available they should return @code{EWOULDBLOCK}.

The definition of ``immediately'' is more or less server-dependent.
Some servers, notably stored filesystem servers (@pxref{Stored
Filesystems}), regard all data as immediately available.  The one
criterion is that something which must happen @dfn{immediately} may not
wait for any user-synchronizable event.

The @code{O_ASYNC} bit is deprecated; its use is documented in the
following section.  This bit must be shared between all users of the
same underlying object.


@node Asynchronous I/O
@subsection Asynchronous I/O

@findex io_async
Users may wish to be notified when I/O can be done without blocking;
they use the @code{io_async} call to indicate this to the server.  In
the @code{io_async} call the user provides a port on which will the
server should send @code{sig_post} messages as I/O becomes possible.
The server must return a port which will be the reference port in the
@code{sig_post} messages.  Each @code{io_async} call should generate a
new reference port.  (FIXME: xref the C library manual for information
on how to send sig_post messages.)

The server then sends one @code{SIGIO} signal to each registered async
user everytime I/O becomes possible.  I/O is possible if at least one
byte can be read or written immediately.  The definition of
``immediately'' must be the same as for the implementation of the
@code{O_NONBLOCK} flag (@pxref{Open Modes}).  In addition, every time a
user calls @code{io_read} or @code{io_write} on a non-seekable object, or at the
default file pointer on a seekable object, another signal should be sent
to each user if I/O is still possible.

Some objects may also define ``urgent'' conditions.  Such servers should
send the @code{SIGURG} signal to each registered async user anytime an
urgent condition appears.  After any RPC that has the possibility of
clearing the urgent condition, the server should again send the signal
to all registered users if the urgent condition is still present.

@findex io_select
A more fine-grained mechanism for doing async I/O is the
@code{io_select} call.  The user specifies the kind of access desired,
and a send-once right.  If I/O of the kind the user desires is
immediately possible, then the server should return so indicating, and
destroy the send-once right.  If I/O is not immediately possible, the
server should save the send-once right, and send a @code{select_done}
message as soon as I/O becomes immediately possible.  Again, the
definition of ``immediately'' must be the same for @code{io_select},
@code{io_async}, and @code{O_NONBLOCK} (@pxref{Open Modes}).

@findex io_mod_owner
@findex io_get_owner
@findex io_get_icky_async_id
For compatibility with 4.2 and 4.3 BSD, the I/O interface provides a
deprecated feature (known as @dfn{icky async I/O}).  The calls
@code{io_mod_owner} and @code{io_get_owner} set the ``owner'' of the
object, providing either a pid or a pgrp (if the value is negative).
This implies that only one process at a time can do icky I/O on a given
object.  Whenever the I/O server is sending @code{sig_post} messages to
all the @code{io_async} users, if the @code{O_ASYNC} bit is set, the
server should also send a signal to the owning pid/pgrp.  The ID port
for this call should be different from all the @code{io_async} ID ports
given to users.  Users may find out what ID port the server uses for
this by calling @code{io_get_icky_async_id}.

@node Information Queries
@subsection Information Queries

@findex io_stat
Users may call @code{io_stat} to find out information about the I/O
object.  Most of the fields of a @code{struct stat} are meaningful only
for files.  All objects, however, must support the fields
@var{st_fstype}, @var{st_fsid}, @var{st_ino}, @var{st_atime},
@var{st_atime_usec}, @var{st_mtime_user}, @var{st_ctime},
@var{st_ctime_usec}, and @var{st_blksize}.

@var{st_fstype}, @var{st_fsid}, and @var{st_ino} must be unique for
the underlying object across the entire system.

@var{st_atime} and @var{st_atime_usec} hold the seconds and
microseconds, respectively, of the system clock at the last time the
object was read with @code{io_read}.

@var{st_mtime} and @var{st_mtime_usec} hold the seconds and microseconds,
respectively, of the system clock at the last time the object was
written with @code{io_write}.

Other appropriate operations may update the @var{atime} and the
@var{mtime} as well; both the file and socket interfaces specify such
operations.

@var{st_ctime} and @var{st_ctime_usec} hold the seconds and
microseconds, respectively, of the system clock at the last time
permanent meta-data associated with the object was changed.  The exact
operations which cause such an update are server-dependent, but must
include the creation of the object.

The server is permitted to delay the actual update of these times until
stat is called; before the server stores the times on permanent media
(if it ever does so) it should update them if necessary.

@var{st_blksize} gives the optimal I/O size in bytes for @code{io_read}
and @code{io_write}; users should endeavor to read and write amounts
which are multiples of the optimal size, and to use offsets which are
multiples of the optimal size.

In addition, objects which are seekable should set @var{st_size} to the
current file size as in the description of the @code{O_APPEND} flag
(@pxref{Open Modes}).

The @var{st_uid} and @var{st_gid} fields are unrelated to the ``owner''
as described above for icky async I/O.

@findex io_server_version
Users may find out the version of the server they are talking to by
calling @code{io_server_version}; this should return strings and
integers describing the version number of the server, as well as its
name.

@node Mapped Data
@subsection Mapped Data

@findex io_map
Servers may optionally implement the @code{io_map} call.  The ports
returned by @code{io_map} must implement the external pager kernel
interface (@pxref{Pager Library}) and be suitable as arguments to
@code{vm_map}.

Seekable objects must allow access from zero up to (but not including)
the current file size as described for @code{O_APPEND} (@pxref{Open
Modes}).  Whether they provide access beyond such a point is
server-dependent; in addition, the meaning of accessing a non-seekable
object is server-dependent.


@node Files
@chapter Files

A file is traditionally thought of as a quantity of disk storage.  In
the Hurd, files are an extension of the I/O interface, but they do not
necessarily correspond to disk storage.

Every file in the Hurd is represented by a port, which is connected to
the server that manages the file.  When a client wants to operate on a
file, it makes RPC requests via a file port to its server process, which
is commonly called a @dfn{translator}.

@menu
* Translators::                 Extending the Hurd filesystem hierarchy.
* Trivfs Library::              Implementing single-file translators.
* Fshelp Library::              Miscellaneous generic filesystem routines.
* File Interface::              File ports implement the file interface.
* Filesystem Interface::        Translator control interface.
@end menu


@node Translators
@section Translators

The Hurd filesystem allows you to set translators on any file or
directory that you own.  A @dfn{translator} is any Hurd server which
provides the basic filesystem interface.  Translated nodes are somewhat
like a cross between Unix symbolic links and mount points.

Whenever a program tries to access the contents of a translated node,
the filesystem server redirects the request to the appropriate
translator (starting it if necessary).  Then, the new translator
services the client's request.  The GNU C library makes this behaviour
seamless from the client's perspective, so that standard Unix programs
behave correctly under the Hurd.

Translators run with the privileges of the translated node's
@emph{owner}, so they cannot be used to compromise the security of the
system.  This also means that @emph{any} user can write their own
translators, and provide other users with arbitrary
filesystem-structured data, regardless of the data's actual source.
Other chapters in this manual describe existing translators, and how you
can modify them or write your own.

The standard Hurd filesystem servers are constantly evolving to provide
innovative features that users want.  Here are a few examples of
existing translators:

@itemize @bullet
@item
Disk-based filesystem formats, such as @code{ext2fs}, @code{ufs}, and
@code{iso9660fs} (@pxref{Stored Filesystems}).

@item
Network filesystems, such as @code{nfs} and @code{ftpfs}
(@pxref{Distributed Filesystems}).

@item
Single files with dynamic content, such as FIXME: we need a good
example.

@item
@c FIXME: reword
Hurd servers which translate rendezvous filesystem nodes in standard
locations, so that other programs can easily find them and use
server-specific interfaces.  For example, @code{pflocal} implements the
filesystem interfaces, but it also provides a special Unix-domain socket
RPC interface (FIXME xref).  Programs can fetch a port to this
translator simply by calling @code{file_name_lookup} (FIXME xref) on
@file{/servers/socket/1}@footnote{The number 1 corresponds to the
@code{PF_LOCAL} C library socket domain constant.}, then use Unix
socket-specific RPCs on that port, rather than adhering to the file
protocol.
@end itemize

This section focuses on the generic programs that you need to understand
in order to use existing translators.  Many other parts of this manual
describe how you can write your own translators.

@menu
* Invoking settrans::           Declaring how a node should be translated.
* Invoking showtrans::          Displaying how nodes are translated.
* Invoking mount::              Unix-compatible active filesystem translators.
* Invoking fsysopts::           Modifying translation parameters at runtime.
@end menu


@node Invoking settrans
@subsection Invoking @code{settrans}
@pindex settrans

The @code{settrans} program allows you to set a translator on a file or
directory.  By default, the passive translator is set (see the
@samp{--passive} option).

The @code{settrans} program has the following synopsis:

@example
settrans [@var{option}]@dots{} @var{node} [@var{translator} @var{arg}@dots{}]
@end example

@noindent
where @var{translator} is the absolute filename of the new translator
program.  Each @var{arg} is passed to @var{translator} when it starts.
If @var{translator} is not specified, then @code{settrans} clears the
existing translator rather than setting a new one.

@code{settrans} accepts the following options:

@table @samp
@item -a
@itemx --active
Set @var{node}'s active translator.  @dfn{Active translators} are
started immediately and are not persistent: if the system is rebooted
then they are lost.

@item -c
@itemx --create
Create @var{node} as a zero-length file if it doesn't already exist.

@item -L
@itemx --dereference
If @var{node} is already translated, stack the new translator on top of
it (rather than replacing the existing translator).

@item --help
Display a brief usage message, then exit.

@item -p
@itemx --passive
Set @var{node}'s passive translator.  @dfn{Passive translators} are only
activated by the underlying filesystem when clients try to use the
@var{node}, and they shut down automatically after they are no longer
active in order to conserve system resources.

Passive translators are stored on the underlying filesystem media, and
so they persist between system reboots.  Not all filesystems support
passive translators, due to limitations in their underlying media.
Consult the filesystem-specific documentation to see if they are
supported.

If you are setting the passive translator, and @var{node} already has an
active translator, then the following options apply:

@table @samp
@item -g
@itemx --goaway
Tell the active translator to go away.  In this case, the following
additional options apply:

@table @samp
@item -f
@itemx --force
If the active translator doesn't go away, then force it.

@item -S
@itemx --nosync
Don't flush its contents to disk before terminating.

@item -R
@itemx --recursive
Shut down all of the active translator's children, too.
@end table


@item -k
@itemx --keep-active
Leave the existing active translator running.  The new translator will
not be started unless the active translator has stopped.
@end table

@item -P
@itemx --pause
When starting an active translator, prompt and wait for a newline on
standard input before completing the startup handshake.  This is useful
when debugging a translator, as it gives you time to start the debugger.

@item -t @var{sec}
@itemx --timeout=@var{sec}
If the translator does not start up in @var{sec} seconds (the default is
60), then return an error; if @var{sec} is 0, then never timeout.

@item --version
Output program version information and exit.

@item -x
@itemx --exclusive
Only set the translator if there is none already.
@end table


@node Invoking showtrans
@subsection Invoking @code{showtrans}

The @code{showtrans} program allows you to show the passive translator
setting on a file system node.

The @code{showtrans} program has the following synopsis:

@example
showtrans [@var{option}]@dots{} @var{file}@dots{}
@end example

@code{showtrans} accepts the following options:

@table @code
@item -p
@itemx --prefix
Always display @var{filename}: before translators.

@item -P
@itemx --no-prefix
Never display @var{filename}: before translators.
@item -s
@itemx --silent
No output; useful when checking error status.
@item -t
@itemx --translated
Only display files that have translators.
@end table


@node Invoking mount
@subsection Invoking @code{mount}


@node Invoking fsysopts
@subsection Invoking @code{fsysopts}

The @code{fsysopts} program allows you to retrieve or set command line
options for running translator @var{filesys}.

The @code{fsysopts} program has the following synopsis:

@example
fsysopts [@var{option}@dots{}] @var{filesys} [@var{fs_option}@dots{}]
@end example

@code{fsysopts} accepts the following options:

@table @code

@item -L
@itemx --dereference
If @var{filesys} is a symbolic link, follow it.

@item -R
@itemx --recursive
Pass these options to any child translators.
@end table

The legal values for @var{fs_option} depends on @var{filesys}, but
some common ones are:

@table @code
@item --readonly
@item --writable
@item --remount
@item --sync[=@var{interval}]
@item --nosync
@end table

If no options are supplied, @var{filesys}' current options are
printed.


@node Trivfs Library
@section Trivfs Library
@scindex libtrivfs
@scindex trivfs.h

Certain translators do not need to be very complex, because they
represent a single file rather than an entire directory hierarchy.  The
trivfs library, which is declared in @code{<hurd/trivfs.h>}, does most of
the work of implementing this kind of translator.  This library requires
the iohelp and ports libraries.

@menu
* Trivfs Startup::              Writing a simple trivfs-based translator.
* Trivfs Callbacks::            Mandatory user-defined trivfs functions.
* Trivfs Options::              Optional user-defined trivfs functions.
* Trivfs Ports::                Managing control and protid ports.
@end menu

@node Trivfs Startup
@subsection Trivfs Startup

In order to use the trivfs library, you will need to define the
appropriate callbacks (@pxref{Trivfs Callbacks}).  As with all Hurd
servers, your trivfs-based translator should first parse any
command-line options, in case the user is just asking for help.  Trivfs
uses argp (@pxref{Argp, , , libc, The GNU C Library Reference Manual})
for parsing command-line arguments.

Your translator should redefine the following functions and variables as
necessary, and then call @code{argp_parse} with the relevant arguments:

@deftypevar {extern struct argp *} trivfs_runtime_argp
If this is defined or set to an argp structure, it will be used by the
default @code{trivfs_set_options} to handle runtime options parsing.
Redefining this is the normal way to add option parsing to a trivfs
program.
@end deftypevar

@deftypefun error_t trivfs_set_options (@w{struct trivfs_control *@var{fsys}}, @w{char *@var{argz}}, @w{size_t @var{argz_len}})
Set runtime options for @var{fsys} to @var{argz} and @var{argz_len}.
The default definition for this routine simply uses
@var{trivfs_runtime_argp} (supplying @var{fsys} as the argp input
field).
@end deftypefun

@deftypefun error_t trivfs_append_args (@w{struct trivfs_control *@var{fsys}}, @w{char **@var{argz}}, @w{size_t *@var{argz_len}})
Append to the malloced string @code{*@var{argz}} of length
@code{*@var{argz_len}} a NUL-separated list of the arguments to this
translator.
@end deftypefun

@c FIXME: Shouldn't `NUL-separated', above, be changed to
@c `NUL-terminated' (or, as I prefer, `zero-terminated')?
@c  tb: no, it's a NUL-separated list.  Something like:
@c     "foo\0bar\0baz\0quux"

After your translator parses its command-line arguments, it should fetch
its bootstrap port by using @code{task_get_bootstrap_port}.  If this
port is @code{MACH_PORT_NULL}, then your program wasn't started as a
translator.  Otherwise, you can use the bootstrap port to create a new
control structure (and advertise its port) with @code{trivfs_startup}:

@deftypefun error_t trivfs_startup (@w{mach_port_t @var{bootstrap}}, @w{int @var{flags}}, @w{struct port_class *@var{control_class}}, @w{struct port_bucket *@var{control_bucket}}, @w{struct port_class *@var{protid_class}}, @w{struct port_bucket *@var{protid_bucket}}, @w{struct trivfs_control **@var{control}})
@deftypefunx error_t trivfs_create_control (@w{mach_port_t @var{bootstrap}}, @w{struct port_class *@var{control_class}}, @w{struct port_bucket *@var{control_bucket}}, @w{struct port_class *@var{protid_class}}, @w{struct port_bucket *@var{protid_bucket}}, @w{struct trivfs_control **@var{control}})
@code{trivfs_startup} creates a new trivfs control port, advertises it
to the underlying node @var{bootstrap} with @code{fsys_startup},
returning the results of this call, and places its control structure in
@code{*@var{control}}.  @code{trivfs_create_control} does the same
thing, except it doesn't advertise the control port to the underlying
node.  @var{control_class} and @var{control_bucket} are passed to
@code{libports} to create the control port, and @var{protid_class} and
@var{protid_bucket} are used when creating ports representing opens of
this node; any of these may be zero, in which case an appropriate port
class/bucket is created.  If @var{control} is non-null, the trivfs
control port is returned in it.  @var{flags} (a bitmask of the
appropriate @code{O_*} constants) specifies how to open the underlying
node.
@end deftypefun

If you did not supply zeros as the class and bucket arguments to
@code{trivfs_startup}, you will probably need to use the trivfs port
management functions (@pxref{Trivfs Ports}).

Once you have successfully called @code{trivfs_startup}, and have a
pointer to the control structure stored in, say, the @var{fsys}
variable, you are ready to call one of the
@code{ports_manage_port_operations_*} functions using
@code{@var{fsys}->pi.bucket} and @code{trivfs_demuxer}.  This will
handle any incoming filesystem requests, invoking your callbacks when
necessary.

@deftypefun int trivfs_demuxer (@w{mach_msg_header_t *@var{inp}}, @w{mach_msg_header_t *@var{outp}})
Demultiplex incoming @code{libports} messages on trivfs ports.
@end deftypefun

The following functions are not usually necessary, but they allow you to
use the trivfs library even when it is not possible to turn
message-handling over to @code{trivfs_demuxer} and @code{libports}:

@deftypefun {struct trivfs_control *} trivfs_begin_using_control (@w{mach_port_t @var{port}})
@deftypefunx {struct trivfs_protid *} trivfs_begin_using_protid (@w{mach_port_t @var{port}})
These functions can be used as @code{intran} functions for a MiG port
type to have the stubs called with either the control or protid pointer.
@end deftypefun

@c FIXME: `intran' needs to be explained, or else there needs to be
@c a cross-reference there.
@c  tb: `intran' is a keyword in MiG.

@deftypefun void trivfs_end_using_control (@w{struct trivfs_control *@var{port}})
@deftypefunx void trivfs_end_using_protid (@w{struct trivfs_protid *@var{port}})
These can be used as `destructor' functions for a MiG port type, to have
the stubs called with the control or protid pointer.
@end deftypefun

@deftypefun error_t trivfs_open (@w{struct trivfs_control *@var{fsys}}, @w{struct iouser *@var{user}}, @w{unsigned @var{flags}}, @w{mach_port_t @var{realnode}}, @w{struct trivfs_protid **@var{cred}})
Return a new protid (that is, a port representing an open of this node)
pointing to a new peropen in @var{cred}, with @var{realnode} as the
underlying node reference, with the given identity, and open flags in
@var{flags}.  @var{cntl} is the trivfs control object.
@end deftypefun

@deftypefun error_t trivfs_protid_dup (@w{struct trivfs_protid *@var{cred}}, @w{struct trivfs_protid **@var{dup}})
Return a duplicate of @var{cred} in @var{dup}, sharing the same peropen
and hook.  A non-null protid @var{hook} indicates that
@var{trivfs_peropen_create_hook} created this protid (@pxref{Trivfs
Options}).
@end deftypefun

@deftypefun error_t trivfs_set_atime (@w{struct trivfs_control *@var{cntl}})
@deftypefunx error_t trivfs_set_mtime (@w{struct trivfs_control *@var{cntl}})
Call these to set atime or mtime for the node to the current time.
@end deftypefun


@node Trivfs Callbacks
@subsection Trivfs Callbacks

Like several other Hurd libraries, @code{libtrivfs} requires that you
define a number of application-specific callback functions and
configuration variables.  You @emph{must} define the following variables
and functions:

@deftypevar {extern int} trivfs_fstype
@deftypevarx {extern int} trivfs_fsid
These variables are returned in the @var{st_fstype} and @var{st_fsid}
fields of @code{struct stat}.  @var{trivfs_fstype} should be chosen
from the @code{FSTYPE_*} constants found in @code{<hurd/hurd_types.h>}.
@end deftypevar

@deftypevar {extern int} trivfs_allow_open
Set this to some bitwise OR combination of @code{O_READ},
@code{O_WRITE}, and @code{O_EXEC}; trivfs will only allow opens of the
specified modes.
@end deftypevar

@deftypevar {extern int} trivfs_support_read
@deftypevarx {extern int} trivfs_support_write
@deftypevarx {extern int} trivfs_support_exec
Set these to nonzero if trivfs should allow read, write, or execute of
the file.  These variables are necessary because @var{trivfs_allow_open}
is used only to validate opens, not actual operations.
@end deftypevar

@deftypefun void trivfs_modify_stat (@w{struct trivfs_protid *@var{cred}}, @w{struct stat *@var{stbuf}})
This should modify a @code{struct stat} (as returned from the underlying
node) for presentation to callers of @code{io_stat}.  It is permissible
for this function to do nothing, but it must still be defined.
@end deftypefun

@deftypefun error_t trivfs_goaway (@w{struct trivfs_control *@var{cntl}}, @w{int @var{flags}})
This function is called when someone wants the filesystem @var{cntl} to
go away.  @var{flags} are from the set @code{FSYS_GOAWAY_*} found in
@code{<hurd/hurd_types.h>}.
@end deftypefun


@node Trivfs Options
@subsection Trivfs Options

The functions and variables described in this subsection already have
default definitions in @code{libtrivfs}, so you are not forced to define
them; rather, they may be redefined on a case-by-case basis.

@deftypevar {extern struct port_class *} trivfs_protid_portclasses []
@deftypevarx {extern int} trivfs_protid_nportclasses
@deftypevarx {extern struct port_class *} trivfs_cntl_portclasses []
@deftypevarx {extern int} trivfs_cntl_nportclasses
If you define these, they should be vectors (and the associated sizes)
of port classes that will be translated into control and protid pointers
for passing to RPCs, in addition to those passed to or created by
@code{trivfs_create_control} (or @code{trivfs_startup}), which will
automatically be recognized.
@end deftypevar

@deftypefn {Variable} error_t {(*trivfs_check_open_hook)} (@w{struct trivfs_control *@var{cntl}}, @w{struct iouser *@var{user}}, @w{int @var{flags}})
If this variable is non-zero, it will be called every time an open happens.
@var{user} and @var{flags} are from the open; @var{cntl} identifies the
node being opened.  This call need not check permissions on the
underlying node.  This call can block as necessary, unless
@code{O_NONBLOCK} is set in @var{flags}.  Any desired error can be
returned, which will be reflected to the user and will prevent the open from
succeeding.
@end deftypefn

@deftypefn {Variable} error_t (*trivfs_protid_create_hook) (@w{struct trivfs_protid *@var{prot}})
@deftypefnx {Variable} error_t (*trivfs_peropen_create_hook) (@w{struct trivfs_peropen *@var{perop}})
If these variables are non-zero, they will be called every time a new protid or
peropen structure is created and initialized.
@end deftypefn

@deftypefn {Variable} void (*trivfs_protid_destroy_hook) (@w{struct trivfs_protid *@var{prot}})
@deftypefnx {Variable} void (*trivfs_peropen_destroy_hook) (@w{struct trivfs_peropen *@var{perop}})
If these variables is non-zero, they will be called every time a protid or
peropen structure is about to be destroyed.
@end deftypefn

@deftypefn {Variable} error_t (*trivfs_getroot_hook) (@w{struct trivfs_control *@var{cntl}}, @w{mach_port_t @var{reply_port}}, @w{mach_msg_type_name_t @var{reply_port_type}}, @w{mach_port_t @var{dotdot}}, @w{uid_t *@var{uids}}, @w{u_int @var{nuids}}, @w{uid_t *@var{gids}}, @w{u_int @var{ngids}}, @w{int @var{flags}}, @w{retry_type *@var{do_retry}}, @w{char *@var{retry_name}}, @w{mach_port_t *@var{node}}, @w{mach_msg_type_name_t *@var{node_type}})
If this variable is set, it will be called by @code{trivfs_S_fsys_getroot}
before any other processing takes place.  If the return value is
@code{EAGAIN}, normal trivfs getroot processing continues, otherwise the
RPC returns with that return value.
@end deftypefn


@node Trivfs Ports
@subsection Trivfs Ports

If you choose to allocate your own trivfs port classes and buckets, the
following functions may come in handy:

@deftypefun error_t trivfs_add_port_bucket (@w{struct port_bucket **@var{bucket}})
Add the port bucket @code{*@var{bucket}} to the list of dynamically-
allocated port buckets; if @code{*@var{bucket}} is zero, an attempt is
made to allocate a new port bucket, which is then stored in
@code{*@var{bucket}}.
@c FIXME: what if the allocation attempt fails?
@c  tb: then an appropriate error (ENOMEM in this case) is returned.
@c  tb: Users are not supposed to assume they know all the possible error
@c  tb: returns.  All functions that return error_t are like this.
@end deftypefun

@deftypefun void trivfs_remove_port_bucket (@w{struct port_bucket *@var{bucket}})
Remove the previously added dynamic port bucket @var{bucket}, freeing it
if it was allocated by @code{trivfs_add_port_bucket}.
@end deftypefun

@deftypefun error_t trivfs_add_control_port_class (@w{struct port_class **@var{class}})
@deftypefunx error_t trivfs_add_protid_port_class (@w{struct port_class **@var{class}})
Add the port class @code{*@var{class}} to the list of control or protid port
classes recognized by trivfs; if @code{*@var{class}} is zero, an attempt is
made to allocate a new port class, which is stored in @code{*@var{class}}.
@end deftypefun

@deftypefun void trivfs_remove_control_port_class (@w{struct port_class *@var{class}})
@deftypefunx void trivfs_remove_protid_port_class (@w{struct port_class *@var{class}})
Remove the previously added dynamic control or protid port class
@var{class}, freeing it if it was allocated by
@code{trivfs_add_control_port_class} or
@code{trivfs_add_protid_port_class}.
@end deftypefun

Even if you do not use the above allocation functions, you may still be
able to use the default trivfs cleanroutines:

@deftypefun void trivfs_clean_cntl (@w{void *@var{port}})
@deftypefunx void trivfs_clean_protid (@w{void *@var{port}})
These functions should be installed as @code{libports} cleanroutines for
control port classes and protid port classes, respectively.
@end deftypefun


@node Fshelp Library
@section Fshelp Library
@scindex libfshelp
@scindex fshelp.h

The fshelp library implements various things that are useful to most
implementors of the file protocol.  It presumes that you are using the
iohelp library as well.  @code{libfshelp} is divided into separate
facilities which may be used independently.  These functions are
declared in @code{<hurd/fshelp.h>}.
@c FIXME: perhaps `useful to most implementors' should read `generic
@c to most implementations'

@menu
* Passive Translator Linkage::  Invoking passive translators.
* Active Translator Linkage::   Managing active translators.
* Fshelp Locking::              Implementing file locking.
* Fshelp Permissions::          Standard file access permission policies.
* Fshelp Misc::                 Useful standalone routines.
@end menu

@node Passive Translator Linkage
@subsection Passive Translator Linkage

These routines are self-contained and start passive translators,
returning the control port.  They do not require multithreading or the
ports library.

@deftypefn {Typedef} typedef error_t (*fshelp_open_fn_t) (@w{int @var{flags}}, @w{file_t *@var{node}}, @w{mach_msg_type_name_t *@var{node_type}})
A callback used by the translator starting functions.
Given some open flags, opens the appropriate file, and
returns the node port.
@end deftypefn

@deftypefun error_t fshelp_start_translator_long (@w{fshelp_open_fn_t @var{underlying_open_fn}}, @w{char *@var{name}}, @w{char *@var{argz}}, @w{int @var{argz_len}}, @w{mach_port_t *@var{fds}}, @w{mach_msg_type_name_t @var{fds_type}}, @w{int @var{fds_len}}, @w{mach_port_t *@var{ports}}, @w{mach_msg_type_name_t @var{ports_type}}, @w{int @var{ports_len}}, @w{int *@var{ints}}, @w{int @var{ints_len}}, @w{int @var{timeout}}, @w{fsys_t *@var{control}})
Start a passive translator @var{name} with arguments @var{argz} (length
@var{argz_len}).  Initialize the initports to @var{ports} (length
@var{ports_len}), the initints to @var{ints} (length @var{ints_len}),
and the file descriptor table to @var{fds} (length @var{fds_len}).
Return the control port in @code{*@var{control}}.  If the translator doesn't
respond or die in @var{timeout} milliseconds (if @var{timeout} is
greater than zero), return an appropriate error.  If the translator dies
before responding, return @code{EDIED}.
@end deftypefun

@deftypefun error_t fshelp_start_translator (@w{fshelp_open_fn_t @var{underlying_open_fn}}, @w{char *@var{name}}, @w{char *@var{argz}}, @w{int @var{argz_len}}, @w{int @var{timeout}}, @w{fsys_t *@var{control}})
Same as @code{fshelp_start_translator_long}, except the initports and
ints are copied from our own state, @var{fd}[2] is copied from our own
stderr, and the other fds are cleared.  For full-service filesystems, it
is almost always wrong to use @code{fshelp_start_translator}, because
the current working directory of the translator will not then be as
normally expected.  (Current working directories of passive translators
should be the directory they were found in.)  In fact, full-service
filesystems should usually start passive translators as a side-effect of
calling @code{fshelp_fetch_root} (@pxref{Active Translator Linkage}).
@end deftypefun

@node Active Translator Linkage
@subsection Active Translator Linkage

These routines implement the linkage to active translators needed
by any filesystem which supports them.  They require the threads
library and use the passive translator routines above, but they don't
require the ports library at all.

This interface is complex, because creating the ports and state
necessary for @code{start_translator_long} is expensive.  The caller to
@code{fshelp_fetch_root} should not need to create them on every call,
since usually there will be an existing active translator.

@deftypefun void fshelp_transbox_init (@w{struct transbox *@var{transbox}}, @w{struct mutex *@var{lock}}, @w{void *@var{cookie}})
Initialize a transbox, which contains state information for active
translators.
@end deftypefun

@deftypefn {Typedef} typedef error_t (*fshelp_fetch_root_callback1_t) (@w{void *@var{cookie1}}, @w{void *@var{cookie2}}, @w{uid_t *@var{uid}}, @w{gid_t *@var{gid}}, @w{char **@var{argz}}, @w{size_t *@var{argz_len}})
This routine is called by @code{fshelp_fetch_root} to fetch more
information.  Return the owner and group of the underlying translated
file in @code{*@var{uid}} and @code{*@var{gid}}; point
@code{*@var{argz}} at the entire passive translator specification for
the file (setting @code{*@var{argz_len}} to the length).  If there is no
passive translator, then return @code{ENOENT}.  @var{cookie1} is the
cookie passed in @code{fshelp_transbox_init}.  @var{cookie2} is the
cookie passed in the call to @code{fshelp_fetch_root}.
@end deftypefn

@deftypefn {Typedef} typedef error_t (*fshelp_fetch_root_callback2_t) (@w{void *@var{cookie1}}, @w{void *@var{cookie2}}, @w{int @var{flags}}, @w{mach_port_t *@var{underlying}}, @w{mach_msg_type_name_t *@var{underlying_type}})
This routine is called by @code{fshelp_fetch_root} to fetch more
information.  Return an unauthenticated node for the file itself in
@code{*@var{underlying}} and @code{*@var{underlying_type}} (opened with
@var{flags}).  @var{cookie1} is the cookie passed in
@code{fshelp_transbox_init}.  @var{cookie2} is the cookie passed in the
call to @code{fshelp_fetch_root}.
@end deftypefn

@deftypefun error_t fshelp_fetch_root (@w{struct transbox *@var{transbox}}, @w{void *@var{cookie}}, @w{file_t @var{dotdot}}, @w{struct iouser *@var{user}}, @w{int @var{flags}}, @w{fshelp_fetch_root_callback1_t @var{callback1}}, @w{fshelp_fetch_root_callback2_t @var{callback2}}, @w{retry_type *@var{retry}}, @w{char *@var{retryname}}, @w{mach_port_t *@var{root}})
Fetch the root from @var{transbox}.  @var{dotdot} is an unauthenticated
port for the directory in which we are looking; @var{user} specifies the
ids of the user responsible for the call.  @var{flags} are as for
@code{dir_lookup} (but @code{O_CREAT} and @code{O_EXCL} are not
meaningful and are ignored).  The transbox lock (as set by
@code{fshelp_transbox_init}) must be held before the call, and will be
held upon return, but may be released during the operation of the call.
@end deftypefun

@deftypefun int fshelp_translated (@w{struct transbox *@var{box}})
Return true if and only if there is an active translator on this box.
@end deftypefun

@deftypefun error_t fshelp_set_active (@w{struct transbox *@var{box}}, @w{fsys_t @var{newactive}}, @w{int @var{excl}})
Atomically replace the existing active translator port for this box with
@var{newactive}.  If @var{excl} is non-zero then don't modify an
existing active transbox; return @code{EBUSY} instead.
@end deftypefun

@deftypefun error_t fshelp_fetch_control (@w{struct transbox *@var{box}}, @w{mach_port_t *@var{control}})
Fetch the control port to make a request on it.  It's a bad idea to use
@code{fsys_getroot} with the result; use @code{fshelp_fetch_root}
instead.
@end deftypefun

@deftypefun void fshelp_drop_transbox (@w{struct transbox *@var{box}})
Clean transbox state so that deallocation or reuse is possible.
@end deftypefun


@node Fshelp Locking
@subsection Fshelp Locking

The @code{flock} call is in flux, as the current Hurd interface (as of
version @value{VERSION}) is not suitable for implementing the POSIX
record-locking semantics.


@node Fshelp Permissions
@subsection Fshelp Permissions

These functions are designed to aid with user permission checking.  It
is a good idea to use these routines rather than to roll your own, so
that Hurd users see consistent handling of file and directory permission
bits.

@deftypefun error_t fshelp_isowner (@w{struct stat *@var{st}}, @w{struct iouser *@var{user}})
Check to see whether @var{user} should be considered the owner of the
file identified by @var{st}.  If so, return zero; otherwise return an
appropriate error code.
@end deftypefun

@deftypefun error_t fshelp_access (@w{struct stat *@var{st}}, @w{int @var{op}}, @w{struct iouser *@var{user}})
Check to see whether the user @var{user} can operate on the file
identified by @var{st}.  @var{op} is one of @code{S_IREAD},
@code{S_IWRITE}, and @code{S_IEXEC}.  If the access is permitted, return
zero; otherwise return an appropriate error code.
@end deftypefun

@deftypefun error_t fshelp_checkdirmod (@w{struct stat *@var{dir}}, @w{struct stat *@var{st}}, @w{struct iouser *@var{user}})
Check to see whether @var{user} is allowed to modify @var{dir} with respect to
existing file @var{st}.  If there is no existing file, then @var{st}
should be set to zero.  If the access is permissible, return zero;
otherwise return an appropriate error code.
@c FIXME: what does it mean to modify a directory with respect to an
@c existing file?
@c  tb: If you delete a file, say, then you are modifying the directory
@c  tb: (not the file) but with respect to that file.  This is relevant
@c  tb: in implementing the directory sticky-bit permissions algorithm.
@end deftypefun

@node Fshelp Misc
@subsection Fshelp Misc

The following functions are completely standalone:

@deftypefun error_t fshelp_delegate_translation (@w{char *@var{server_name}}, @w{mach_port_t @var{requestor}}, @w{char **@var{argv}})
Try to hand off responsibility from a translator to the server located
on the node @var{server_name}.  @var{requestor} is the translator's
bootstrap port, and @var{argv} is the command line.  If
@var{server_name} is null, then a name is concocted by prepending
@code{_servers} to @code{argv[0]} .
@end deftypefun

@deftypefun error_t fshelp_exec_reauth (@w{int @var{suid}}, @w{uid_t @var{uid}}, @w{int @var{sgid}}, @w{gid_t @var{gid}}, @w{auth_t @var{auth}}, error_t (*@var{get_file_ids}) (@w{struct idvec *@var{uids}}, @w{struct idvec *@var{gids}}), @w{mach_port_t *@var{ports}}, @w{mach_msg_type_number_t @var{num_ports}}, @w{mach_port_t *@var{fds}}, @w{mach_msg_type_number_t @var{num_fds}}, @w{int *@var{secure}})
If @var{suid} or @var{sgid} is true, adds @var{uid} and/or @var{gid}
respectively to the authentication in
@code{@var{ports}[INIT_PORT_AUTH]}, and replaces it with the result.
All the other ports in @var{ports} and @var{fds} are then
reauthenticated, using any privileges available through @var{auth}.  If
the auth port in @code{@var{ports}[INIT_PORT_AUTH]} is bogus, and
@var{get_file_ids} is non-null, it is called to get a list
of uids and gids from the file to use as a replacement.  If @var{secure}
is non-null and any added ids are new, then the variable it points to is
set to nonzero, otherwise zero.  If either the uid or gid case fails,
then the other may still apply.
@end deftypefun

@deftypefun error_t fshelp_get_identity (@w{struct port_bucket *@var{bucket}}, @w{ino_t @var{fileno}}, @w{mach_port_t *@var{pt}})
Return an identity port in @code{*@var{pt}} for the node numbered
@var{fileno}, suitable for returning from @code{io_identity}; exactly
one send right must be created from the returned value.  @var{fileno}
should be the same value returned as the @var{fileno} out-parameter in
@code{io_identity}, and in the enclosing directory (except for mount
points), and in the @code{st_ino} stat field.  @var{bucket} should be a
@code{libports} port bucket; fshelp requires the caller to make sure
port operations (for no-senders notifications) are used.
@end deftypefun

@deftypefun error_t fshelp_return_malloced_buffer (@w{char *@var{buf}}, @w{size_t @var{len}}, @w{char **@var{rbuf}}, @w{mach_msg_type_number_t *@var{rlen}})
Put data from the malloced buffer @var{buf}, @var{len} bytes long, into
@var{rbuf} (which is @var{rlen} bytes long), suitable for returning from
an RPC.  If @var{len} is greater than zero, @var{buf} is freed,
regardless of whether an error is returned or not.
@end deftypefun

@deftypefun error_t fshelp_set_options (@w{struct argp *@var{argp}}, @w{int @var{flags}}, @w{char *@var{argz}}, @w{size_t @var{argz_len}}, @w{void *@var{input}})
Invoke @code{argp_parse} in the standard way, with data from @var{argz}
and @var{argz_len}.
@end deftypefun

@deftypefun void fshelp_touch (@w{struct stat *@var{st}}, @w{unsigned @var{what}}, @w{volatile struct mapped_time_value *@var{maptime}})
Change the stat times of @var{node} as indicated by @var{what} to
the current time.  @var{what} is a bitmask of one or more of
the @code{TOUCH_ATIME}, @code{TOUCH_MTIME}, and @code{TOUCH_CTIME}
constants.
@end deftypefun


@node File Interface
@section File Interface
@scindex fs.defs

This section documents the interface for operating on files.

@menu
* File Overview::               Basic concepts for the file interface.
* Changing Status::             Changing the owner (etc.) of a file.
* Program Execution::           Executing files.
* File Locking::                Implementing the @code{flock} call.
* File Frobbing::               Other active calls on files.
* Opening Files::               Looking up files in directories.
* Modifying Directories::       Creating and deleting nodes.
* Notifications::               File and directory change callbacks.
* File Translators::            How to set and get translators.
@end menu

@node File Overview
@subsection File Overview

The file interface is a superset of the I/O interface (@pxref{I/O
Interface}).  Servers which provide the file interface are required to
support the I/O interface as well.  All objects reachable in the
filesystem are expected to provide the file interface, even if they do
not contain data.  (The @code{trivfs} library makes it easy to do so for
ordinary sorts of cases.  @xref{Trivfs Library}.)

The interface definitions for the file interface are found in
@code{<hurd/fs.defs>}.

Files have various pieces of status information which are returned by
@code{io_stat} (@pxref{Information Queries}).  Most of this status
information can be directly changed by various calls in the file
interface; some of it should vary implicitly as the contents of the file
change.

Many of these calls have general rules associated with them describing
how security and privilege should operate.  The @code{diskfs} library
(@pxref{Diskfs Library}) implements these rules for stored filesystems.
These rules have also been implemented in the fshelp library
(@pxref{Fshelp Library}).  Trivfs-based servers generally have no need
to implement these rules at all.

In special cases, there may be a reason to implement a different
security check from that specified here, or to implement a call to do
something slightly different.  But such cases must be carefully
considered; make sure that you will not confuse innocent user programs
through excessive cleverness.

If some operation cannot be implemented (for example, @code{chauthor}
over FTP), then the call should return @code{EOPNOTSUPP}.  If it is
merely difficult to implement a call, it is much better to figure out a
way to implement it as a series of operations rather than to return
errors to the user.

@node Changing Status
@subsection Changing Status

There are several RPCs available for users to change much of the status
information associated with a file.  (The information is returned by the
@code{io_stat} RPC; see @ref{Information Queries}.)

All these operations are restricted to root and the owner of the file.
When attempted by another user, they should return @code{EPERM}.

@findex file_chown
The @code{file_chown} RPC changes the owner and group of the file.  Only
root should be able to change the owner, and changing the group to a
group the caller is not in should also be prohibited.  Violating either
of these conditions should return @code{EPERM}.

@findex file_chauthor
The @code{file_chauthor} RPC changes the author of the file.  It should
be legitimate to change the author to any value without restriction.

@findex file_chmod
The @code{file_chmod} RPC changes the file permission mode bits.

@findex file_chflags
The @code{file_chflags} RPC changes the flags of the file.  It should be
legitimate to change the flags to any value without restriction.  No
standard meanings have been assigned to the flags yet, but we intend to
do so.  Do not assume that the flags format we choose will map
identically to that of some existing filesystem format.

@findex file_utimes
The @code{file_utimes} RPC changes the @var{atime} and @var{mtime} of
the file.  Making this call must cause the @var{ctime} to be updated as
well, even if no actual change to either the @var{mtime} or the
@var{atime} occurs.

@findex file_set_size
The @code{file_set_size} RPC is special; not only does it change the
status word specifying the size of the file, but it also changes the
actual contents of the file.  If the file size is being reduced it
should release secondary storage associated with the previous contents
of the file.  If the file is being extended, the new region added to the
file must be zero-filled.  Unlike the other RPCs in this section,
@code{file_set_size} should be permitted to any user who is allowed to
write the file.


@node Program Execution
@subsection Program Execution

@findex file_exec
Execution of programs on the Hurd is done through fileservers with the
@code{file_exec} RPC.  The fileserver is expected to verify that the
user is allowed to execute the file, make whatever modifications to the
ports are necessary for setuid execution, and then invoke the standard
execserver found on @file{/servers/exec}.

This section specifically addresses what fileservers are expected to do,
with minimal attention to the other parts of the process.  @xref{Running
Programs}, for more general information.

The file must be opened for execution; if it is not, @code{EBADF} should
be returned.  In addition, at least one of the execute bits must be on.  A
failure of this check should result in @code{EACCES}---not
@code{ENOEXEC}.  It is not proper for the fileserver ever to respond to
the @code{file_exec} RPC with @code{ENOEXEC}.

If either the setuid or setgid bits are set, the server needs to
construct a new authentication handle with the additional new ID's.
Then all the ports passed to @code{file_exec} need to be reauthenticated
with the new handle.  If the fileserver is unable to make the new
authentication handle (for example, because it is not running as root)
it is not acceptable to return an error; in such a case the server
should simply silently fail to implement the setuid/setgid semantics.

If the setuid/setgid transformation adds a new uid or gid to the user's
authentication handle that was not previously present (as opposed to
merely reordering them), then the @code{EXEC_SECURE} and
@code{EXEC_NEWTASK} flags should both be added in the call to
@code{exec_exec}.

The server then needs to open a new port onto the executed file which
will not share any file pointers with the port the user passed in,
opened with @code{O_READ}.  Finally, all the information (mutated
appropriately for setuid/setgid) should be sent to the execserver with
@code{exec_exec}.  Whatever error code @code{exec_exec} returns should
returned to the caller of @code{file_exec}.

@node File Locking
@subsection File Locking

The @code{flock} call is in flux, as the current Hurd interface (as of
version @value{VERSION}) is not suitable for implementing the POSIX
record-locking semantics.

@findex file_lock
@findex file_lock_stat
You should ignore the @code{file_lock} and @code{file_lock_stat} calls
until the new record-locking interface is implemented.


@node File Frobbing
@subsection File Frobbing

FIXME: Other active calls on files

@code{file_sync}

@code{file_getfh}

@code{file_getlinknode}

@code{file_check_access}

These manipulate meta-information:

@code{file_reparent}

@code{file_statfs}

@code{file_syncfs}

@code{file_getcontrol}

@code{file_get_storage_info}

@code{file_get_fs_options}


@node Opening Files
@subsection Opening Files

FIXME: Looking up files in directories

@code{dir_lookup}

@code{dir_readdir}

@node Modifying Directories
@subsection Modifying Directories

@deftypefun kern_return_t dir_mkfile (@w{file_t @var{directory}}, @w{int @var{flags}}, @w{mode_t @var{mode}}, @w{mach_port_t *@var{newnode}})
Create a new file in @var{directory} without linking it into the
filesystem.  You still must have write permission on the specified
directory, even though it will not actually be written.

The function returns a port to the new file in *@var{newnode}.  Flags
are the same as for @code{dir_lookup}, but @code{O_CREAT} and
@code{O_TRUNC} are assumed even if not specified.
@end deftypefun

@deftypefun kern_return_t dir_mkdir (@w{file_t @var{directory}}, @w{char *@var{name}}, @w{mode_t @var{mode}})
Create a new directory named @var{name} in @var{directory} with
permission specified by @var{mode}.
@end deftypefun

@deftypefun kern_return_t dir_rmdir (@w{file_t @var{directory}}, @w{char *@var{name}})
Remove the directory named @var{name} from @var{directory}.
@end deftypefun

@deftypefun kern_return_t dir_unlink (@w{file_t @var{directory}}, @w{char *@var{name}})
Remove the non-directory node @var{name} from @var{directory}.
@end deftypefun

@deftypefun kern_return_t dir_link (@w{file_t @var{directory}}, @w{file_t @var{file}}, @w{char *@var{name}}, @w{int @var{excl}})
Create a hard link in @var{directory}.  If @var{excl} is set and
@var{name} already exists in @var{directory}, then this function will
fail.  If @var{excl} is not set and @var{name} already exists the old
file named @var{name} will be unlinked.  If @var{directory} and
@var{file} are not on the same filesystem, then @code{dir_link} might
fail with @code{EXDEV}.
@end deftypefun

@deftypefun kern_return_t dir_rename (@w{file_t @var{olddirectory}}, @w{char *@var{oldname}}, @w{file_t @var{newdirectory}}, @w{char *@var{newname}}, @w{int @var{excl}})
Move the node @var{oldname} in @var{olddirectory} to the node
@var{newname} in @var{newdirectory}.  If @var{excl} is set and
@var{newname} already exists in @var{newdirectory}, then this function
will fail.  If @var{excl} is not set and @var{newname} already exists,
the old file named @var{newname} will be unlinked.  If
@var{olddirectory} and @var{newdirectory} are not on the same
filesystem, then @code{dir_rename} might fail with @code{EXDEV}.
@end deftypefun

@node Notifications
@subsection Notifications

FIXME: File and directory change callbacks

File change notifications are not yet implemented, but directory
notifications are.

@code{file_notice_changes}

@code{dir_notice_changes}

@node File Translators
@subsection File Translators

FIXME: How to set and get translators

@code{file_set_translator}

@code{file_get_translator}

@code{file_get_translator_cntl}


@node Filesystem Interface
@section Filesystem Interface
@scindex fsys.defs

The filesystem interface (described in @code{<hurd/fsys.defs>}) is
supported by translator control ports.

FIXME: finish


@node Special Files
@chapter Special Files

In Unix, any file that does not act as a general-purpose unit of storage
is called a @dfn{special file}.  These are FIFOs, Unix-domain sockets,
and device nodes.  In the Hurd, there is no need for the ``special
file'' distinction, since they are implemented by translators, just as
regular files are.

Nevertheless, the Hurd maintains this distinction, in order to provide
backward compatibility for Unix programs (which do not know about
translators).  Studying the implementation of Hurd special files is a
good way to introduce the idea of translators to people who are familiar
with Unix.

This chapter does not discuss @file{/dev/zero} or any of the
microkernel-based devices, since these are translated by the generalized
storeio server (FIXME xref).

FIXME: finish

@section fifo
@section ifsock
@section magic
@section null


FIXME: a chapter on libtreefs and libdirmgt will probably go here


@node Stores
@chapter Stores

A @dfn{store} is a fixed-size block of storage, which can be read and
perhaps written to.  A store is more general than a file: it refers to
any type of storage such as devices, files, memory, tasks, etc.  Stores
can also be representations of other stores, which may be combined and
filtered in various ways.

@menu
* Store Library::               An abstract interface to storage systems.
@end menu

@section storeinfo, storecat, storeread
@section storeio

FIXME: finish

@node Store Library
@section Store Library
@scindex libstore
@scindex store.h

The store library (which is declared in @code{<hurd/store.h>})
implements many different backends which support the store abstraction.
Hurd programs use @code{libstore} so that new storage types can be
implemented with minimum impact.

@menu
* Store Arguments::             Parsing store command-line arguments.
* Store Management::            Creating and manipulating stores.
* Store I/O::                   Reading and writing data to stores.
* Store Classes::               Ready-to-use storage backends.
* Store RPC Encoding::          Transferring store descriptors via RPC.
@end menu


@node Store Arguments
@subsection Store Arguments

FIXME: describe startup sequence

@deftypevr {Structure} struct store_parsed
The result of parsing a store, which should be enough information to
open it, or return the arguments.
@end deftypevr

@deftypefn {Structure} struct store_argp_params @{ @w{struct store_parsed *@var{result}}; @w{const char *@var{default_type}}; @w{const struct store_class *const *@var{classes}}; @}
This is the structure used to pass args back and forth from
@var{store_argp}.  @var{result} is the resulting parsed result.  If
@samp{--store-type} isn't specified, then @var{default_type} should be
used as the store type; zero is equivalent to @code{"query"}.
@var{classes} is set of classes used to validate store types and
argument syntax.
@end deftypefn

@deftypevar {extern struct argp} store_argp
This is an argument parser that may be used for parsing a simple command
line specification for stores.  The accompanying input parameter must be
a pointer to a @code{struct store_argp_params}.
@end deftypevar

@deftypefun void store_parsed_free (@w{struct store_parsed *@var{parsed}})
Free all resources used by @var{parsed}.
@end deftypefun

@deftypefun error_t store_parsed_open (@w{const struct store_parsed *@var{parsed}}, @w{int @var{flags}}, @w{struct store **@var{store}})
Open the store specified by @var{parsed}, and return it in @var{store}.
@end deftypefun

@deftypefun error_t store_parsed_append_args (@w{const struct store_parsed *@var{parsed}}, @w{char **@var{argz}}, @w{size_t *@var{argz_len}})
Add the arguments used to create @var{parsed} to @var{argz} and
@var{argz_len}.
@end deftypefun

@deftypefun error_t store_parsed_name (@w{const struct store_parsed *@var{parsed}}, @w{char **@var{name}})
Make an option string describing @var{parsed}, and return it in malloced
storage in @var{name}.
@end deftypefun


@node Store Management
@subsection Store Management

The following functions provide basic management of stores:

@deftypefun error_t store_create (@w{file_t @var{source}}, @w{int @var{flags}}, @w{const struct store_class *const *@var{classes}}, @w{struct store **@var{store}})
Return a new store in @var{store}, which refers to the storage
underlying @var{source}.  @var{classes} is used to select classes
specified by the provider; if zero, @var{store_std_classes} is used.
@var{flags} is set with @code{store_set_flags}, with the exception of
@code{STORE_INACTIVE}, which merely indicates that no attempt should be
made to activate an inactive store; if @code{STORE_INACTIVE} is not
specified, and the store returned for SOURCE is inactive, an attempt is
made to activate it (failure of which causes an error to be returned).
A reference to @var{source} is created (but may be destroyed with
@code{store_close_source}).

It is usually better to use a specific store open or create function
such as @code{store_open} (@pxref{Store Classes}), since they are
tailored to the needs of a specific store.  Generally, you should only
use @code{store_create} if you are defining your own store class, or you
need options that are not provided by a more specific store creation
function.
@end deftypefun

@deftypefun void store_close_source (@w{struct store *@var{store}})
If @var{store} was created using @code{store_create}, remove the
reference to the source from which it was created.
@end deftypefun

@deftypefun void store_free (@w{struct store *@var{store}})
Clean up and deallocate @var{store}'s underlying stores.
@end deftypefun

@deftypefn {Structure} struct store_run @{ @w{store_offset_t @var{start}}, @var{length}; @}
A @code{struct store_run} represents a contiguous region in a store's
address range.  These are used to designate active portions of a store.
If @var{start} is -1, then the region is a @dfn{hole} (it is zero-filled
and doesn't correspond to any real addresses).
@end deftypefn

@deftypefun error_t store_set_runs (@w{struct store *@var{store}}, @w{const struct store_run *@var{runs}}, @w{size_t @var{num_runs}})
Set @var{store}'s current runs list to (a copy of) @var{runs} and
@var{num_runs}.
@end deftypefun

@deftypefun error_t store_set_children (@w{struct store *@var{store}}, @w{struct store *const *@var{children}}, @w{size_t @var{num_children}})
Set @var{store}'s current children to (a copy of) @var{children} and
@var{num_children} (note that just the vector @var{children} is copied,
not the actual children).
@end deftypefun

@deftypefun error_t store_children_name (@w{const struct store *@var{store}}, @w{char **@var{name}})
Try to come up with a name for the children in @var{store}, combining
the names of each child in a way that could be used to parse them with
@code{store_open_children}.  This is done heuristically, and so may not
succeed.  If a child doesn't have a name, @code{EINVAL} is returned.
@end deftypefun

@deftypefun error_t store_set_name (@w{struct store *@var{store}}, @w{const char *@var{name}})
Sets the name associated with @var{store} to a copy of @var{name}.
@end deftypefun

@deftypefun error_t store_set_flags (@w{struct store *@var{store}}, @w{int @var{flags}})
Add @var{flags} to @var{store}'s currently set flags.
@end deftypefun

@deftypefun error_t store_clear_flags (@w{struct store *@var{store}}, @w{int @var{flags}})
Remove @var{flags} from @var{store}'s currently set flags.
@end deftypefun

@deftypefun error_t store_set_child_flags (@w{struct store *@var{store}}, @w{int @var{flags}})
Set @var{flags} in all children of @var{store}, and if successful, add
@var{flags} to @var{store}'s flags.
@end deftypefun

@deftypefun error_t store_clear_child_flags (@w{struct store *@var{store}}, @w{int @var{flags}})
Clear @var{flags} in all children of @var{store}, and if successful,
remove @var{flags} from @var{store}'s flags.
@end deftypefun

@deftypefun int store_is_securely_returnable (@w{struct store *@var{store}}, @w{int @var{open_flags}})
Returns true if @var{store} can safely be returned to a user who has
accessed it via a node using @var{open_flags}, without compromising
security.
@end deftypefun

@deftypefun error_t store_clone (@w{struct store *@var{from}}, @w{struct store **@var{to}})
Return a copy of @var{from} in @var{to}.
@end deftypefun

@deftypefun error_t store_remap (@w{struct store *@var{source}}, @w{const struct store_run *@var{runs}}, @w{size_t @var{num_runs}}, @w{struct store **@var{store}})
Return a store in @var{store} that reflects the blocks in @var{runs} and
@var{runs_len} from source; @var{source} is consumed, but not
@var{runs}.  Unlike the @code{store_remap_create} function, this may
simply modify @var{source} and return it.
@end deftypefun

@c FIXME: what does `is consumed' mean?
@c  tb: gone; you can't use it any more.  libstore has taken it over.

@node Store I/O
@subsection Store I/O

The following functions allow you to read and modify the contents of a
store:

@deftypefun error_t store_map (@w{const struct store *@var{store}}, @w{vm_prot_t @var{prot}}, @w{mach_port_t *@var{memobj}})
Return a memory object paging on @var{store}.
@ignore @c FIXME: update if/when there are more pager-related functions
If this call fails with @code{EOPNOTSUPP}, you can try calling some of
the routines below to get a pager.
@end ignore
@end deftypefun

@deftypefun error_t store_read (@w{struct store *@var{store}}, @w{store_offset_t @var{addr}}, @w{size_t @var{amount}}, @w{void **@var{buf}}, @w{size_t *@var{len}})
Read @var{amount} bytes from @var{store} at @var{addr} into @var{buf}
and @var{len} (which follows the usual Mach buffer-return semantics) to
@var{store} at @var{addr}.  @var{addr} is in @var{blocks} (as defined by
@code{@var{store}->block_size}).  Note that @var{len} is in bytes.
@end deftypefun

@c FIXME: should be say `Mach' above, or should we say
@c `microkernel'?
@c  tb: nope, Mach-specific semantics.

@deftypefun error_t store_write (@w{struct store *@var{store}}, @w{store_offset_t @var{addr}}, @w{void *@var{buf}}, @w{size_t @var{len}}, @w{size_t *@var{amount}})
Write @var{len} bytes from @var{buf} to @var{store} at @var{addr}.
Returns the amount written in @var{amount} (in bytes).  @var{addr} is in
@var{blocks} (as defined by @code{@var{store}->block_size}).
@end deftypefun

@deftypefun error_t store_set_size (@w{struct store *@var{store}}, @w{store_offset_t @var{newsize}})
Set @var{store}'s size to @var{newsize} (in bytes).
@end deftypefun

@node Store Classes
@subsection Store Classes

The store library comes with a number of standard store class
implementations:

@deftypevar {extern const struct store_class *const} store_std_classes []
This is a null-terminated vector of the standard store classes
implemented by @code{libstore}.
@end deftypevar

If you are building your own class vectors, the following function may
be useful:

@deftypevar error_t store_concat_class_vectors (@w{struct store_class **@var{cv1}}, @w{struct store_class **@var{cv2}}, @w{struct store_class ***@var{concat}})
Concatenate the store class vectors in @var{cv1} and @var{cv2}, and
return a new (malloced) vector in @var{concat}.
@end deftypevar

@subsubsection @code{query} store
@cindex @code{query} store

@deftypevar {extern const struct store_class} store_query_class
This store is a virtual store which queries a filesystem node, and
delegates control to an appropriate store class.
@end deftypevar

@deftypefun error_t store_open (@w{const char *@var{name}}, @w{int @var{flags}}, @w{const struct store_class *const *@var{classes}}, @w{struct store **@var{store}})
Open the file @var{name}, and return a new store in @var{store}, which
refers to the storage underlying it.  @var{classes} is used to select
classes specified by the provider; if it is zero, then
@var{store_std_classes} is used.  @var{flags} is set with
@code{store_set_flags}.  A reference to the open file is created (but
may be destroyed with @code{store_close_source}).
@end deftypefun

@subsubsection @code{typed_open} store
@cindex @code{typed_open} store

@deftypevar {extern const struct store_class} store_typed_open_class
This store is special in that it doesn't correspond to any specific
store functions, rather it provides a way to interpret character strings
as specifications for other stores.
@end deftypevar

@deftypefun error_t store_typed_open (@w{const char *@var{name}}, @w{int @var{flags}}, @w{const struct store_class *const *@var{classes}}, @w{struct store **@var{store}})
Open the store indicated by @var{name}, which should consist of a store
type name followed by a @samp{:} and any type-specific name, returning the
new store in @var{store}.  @var{classes} is used to select classes
specified by the type name; if it is zero, @var{store_std_classes} is
used.
@end deftypefun

@deftypefun error_t store_open_children (@w{const char *@var{name}}, @w{int @var{flags}}, @w{const struct store_class *const *@var{classes}}, @w{struct store ***@var{stores}}, @w{size_t *@var{num_stores}})
Parse multiple store names in @var{name}, and open each individually,
returning all in the vector @var{stores}, and the number in
@var{num_stores}.  The syntax of @var{name} is a single non-alphanumeric
separator character, followed by each child store name separated by the
same separator; each child name is @samp{@var{type}:@var{name}} notation
as parsed by @code{store_typed_open}.  If every child uses the same
@samp{@var{type}:} prefix, then it may be factored out and put before
the child list instead (the two notations are differentiated by whether
or not the first character of @var{name} is alphanumeric).
@end deftypefun

@subsubsection @code{device} store
@cindex @code{device} store

@cindex @code{device drivers}
@deftypevar {extern const struct store_class} store_device_class
This store is a simple wrapper for a microkernel device
driver.@footnote{It is important to note that device drivers are not
provided by the Hurd, but by the underlying microkernel.  Hurd `devices'
are just storeio-translated nodes which make the microkernel device
drivers obey Hurd semantics.  If you wish to implement a new device
driver, you will need to consult the appropriate microkernel
documentation.}
@end deftypevar

@deftypefun error_t store_device_open (@w{const char *@var{name}}, @w{int @var{flags}}, @w{struct store **@var{store}})
Open the device named @var{name}, and return the corresponding store in
@var{store}.
@end deftypefun

@deftypefun error_t store_device_create (@w{device_t @var{device}}, @w{int @var{flags}}, @w{struct store **@var{store}})
Return a new store in @var{store} referring to the microkernel device
@var{device}.  Consumes the @var{device} send right.
@end deftypefun

@subsubsection @code{file} store
@cindex @code{file} store

@deftypevar {extern const struct store_class} store_file_class
This store reads and writes the contents of a Hurd file.
@end deftypevar

@deftypefun error_t store_file_open (@w{const char *@var{name}}, @w{int @var{flags}}, @w{struct store **@var{store}})
Open the file @var{name}, and return the corresponding store in @var{store}.
@end deftypefun

@deftypefun error_t store_file_create (@w{file_t @var{file}}, @w{int @var{flags}}, @w{struct store **@var{store}})
Return a new store in @var{store} referring to the file @var{file}.
Unlike @code{store_create}, this will always use file I/O, even it would
be possible to be more direct.  This may work in more cases, for instance
if the file has holes.  Consumes the @var{file} send right.
@end deftypefun

@subsubsection @code{task} store
@cindex @code{task} store

@deftypevar {extern const struct store_class} store_task_class
This store provides access to the contents of a microkernel task.
@end deftypevar

@deftypevar error_t store_task_open (@w{const char *@var{name}}, @w{int @var{flags}}, @w{struct store **@var{store}})
Open the task @var{name} (@var{name} should be the task's pid), and
return the corresponding store in @var{store}.
@end deftypevar

@deftypevar {error_t} store_task_create (@w{task_t @var{task}}, @w{int @var{flags}}, @w{struct store **@var{store}})
Return a new store in @var{store} referring to the task @var{task},
consuming the @var{task} send right.
@end deftypevar

@subsubsection @code{zero} store
@cindex @code{zero} store

@deftypevar {extern const struct store_class} store_zero_class
Reads to this store always return zero-filled buffers, no matter what
has been written into it.  This store corresponds to the Unix
@file{/dev/zero} device node.
@end deftypevar

@deftypefun error_t store_zero_create (@w{store_offset_t @var{size}}, @w{int @var{flags}}, @w{struct store **@var{store}})
Return a new zero store @var{size} bytes long in @var{store}.
@end deftypefun

@subsubsection @code{copy} store
@cindex @code{copy} store

@deftypevar {extern const struct store_class} store_copy_class
This store provides a temporary copy of another store.  This is useful
if you want to provide writable data, but do not wish to modify the
underlying store.  All changes to a copy store are lost when it is
closed.
@end deftypevar

@deftypefun error_t store_copy_open (@w{const char *@var{name}}, @w{int @var{flags}}, @w{const struct store_class *const *@var{classes}}, @w{struct store **@var{store}})
Open the copy store @var{name} (which consists of another store class
name, a @samp{:}, and a name for the store class to open) and return the
corresponding store in @var{store}.  @var{classes} is used to select
classes specified by the type name; if it is zero,
@var{store_std_classes} is used.
@end deftypefun

@deftypefun error_t store_copy_create (@w{struct store *@var{from}}, @w{int @var{flags}}, @w{struct store **@var{store}})
Return a new store in @var{store} which contains a snapshot of the
contents of the store @var{from}; @var{from} is consumed.
@end deftypefun

@deftypefun error_t store_buffer_create (@w{void *@var{buf}}, @w{size_t @var{buf_len}}, @w{int @var{flags}}, @w{struct store **@var{store}})
Return a new store in @var{store} which contains the memory buffer
@var{buf}, of length @var{buf_len}.  @var{buf} must be allocated with
@code{vm_allocate}, and will be consumed.
@end deftypefun

@subsubsection @code{gunzip} store
@cindex @code{gunzip} store

@deftypevar {extern const struct store_class} store_gunzip_class
This store provides transparent GNU zip decompression of a substore.
Unfortunately, this store is currently read-only.
@end deftypevar

@deftypevar error_t store_gunzip_open (@w{const char *@var{name}}, @w{int @var{flags}}, @w{const struct store_class *const *@var{classes}}, @w{struct store **@var{store}})
Open the gunzip store @var{name} (which consists of another store class
name, a @samp{:}, and a name for that store class to open), and return
the corresponding store in @var{store}.  @var{classes} is used to select
classes specified by the type name; if it is zero,
@var{store_std_classes} is used.
@end deftypevar

@deftypevar error_t store_gunzip_create (@w{struct store *@var{from}}, @w{int @var{flags}}, @w{struct store **@var{store}})
Return a new store in @var{store} which contains a snapshot of the
uncompressed contents of the store @var{from}; @var{from} is consumed.
@var{block_size} is the desired block size of the result.
@end deftypevar

@subsubsection @code{concat} store
@cindex @code{concat} store

@cindex linear concatenation
@cindex appending disks
@cindex disks, appending
@cindex disk concatenation
@cindex concatenation, disk
@deftypevar {extern const struct store_class} store_concat_class
This class provides a linear concatenation storage mode.  It creates a
new virtual store which consists of several different substores appended
to one another.

This mode is designed to increase storage capacity, so that when one
substore is filled, new data is transparently written to the next
substore.  Concatenation requires robust hardware, since a failure in
any single substore will wipe out a large section of the data.
@end deftypevar

@deftypefun error_t store_concat_open (@w{const char *@var{name}}, @w{int @var{flags}}, @w{const struct store_class *const *@var{classes}}, @w{struct store **@var{store}})
Return a new store that concatenates the stores created by opening all
the individual stores described in @var{name}; for the syntax of
@var{name}, see @code{store_open_children}.
@end deftypefun

@deftypefun error_t store_concat_create (@w{struct store * const *@var{stores}}, @w{size_t @var{num_stores}}, @w{int @var{flags}}, @w{struct store **@var{store}})
Return a new store in @var{store} that concatenates all the stores in
@var{stores} (@var{num_stores} of them).  The stores in @var{stores} are
consumed; that is, they will be freed when this store is freed.  The
@var{stores} @emph{array}, however, is copied, and so should be freed by
the caller.
@end deftypefun

@subsubsection @code{ileave} store
@cindex @code{ileave} store

@cindex RAID-0
@cindex striping, disk
@cindex disk striping
@cindex interleaving disks
@cindex disks, interleaving
@deftypevar {extern const struct store_class} store_ileave_class
This class provides a RAID-0@footnote{``RAID'' stands for @dfn{Redundant Array of
Independent Disks}: several disks used in
parallel to achieve increased capacity, redundancy and/or
performance.} storage mode (also called @dfn{disk striping}).  It
creates a new virtual store by interleaving the contents of several
different substores.

This RAID mode is designed to increase storage performance, since I/O
will probably occur in parallel if the substores reside on different
physical devices.  Interleaving works best with evenly-yoked
substores@dots{} if the stores are different sizes, some space will be
not be used at the end of the larger stores; if the stores are different
speeds, then I/O will have to wait for the slowest store; if some stores
are not as reliable as others, failures will wipe out every @var{n}th
storage block, where @var{n} is the number of substores.
@end deftypevar

@deftypefun error_t store_ileave_create (@w{struct store * const *@var{stripes}}, @w{size_t @w{num_stripes}}, @w{store_offset_t @var{interleave}}, @w{int @var{flags}}, @w{struct store **@var{store}})
Return a new store in @var{store} that interleaves all the stores in
@var{stripes} (@var{num_stripes} of them) every @var{interleave} bytes;
@var{interleave} must be an integer multiple of each stripe's block
size.  The stores in @var{stripes} are consumed; that is, they will be
freed when this store is freed.  The @var{stripes} @emph{array},
however, is copied, and so should be freed by the caller.
@end deftypefun

@subsubsection @code{mvol} store
@cindex @code{mvol} store

@deftypevar {extern const struct store_class} store_mvol_class
This store provides access to multiple volumes using a single-volume
device.  One use of this store would be to provide a store which
consists of multiple floppy disks when there is only a single disk
drive.  It works by remapping a single linear address range to multiple
address ranges, and keeping track of the currently active range.
Whenever a request maps to a range that is not active, a callback is
made in order to switch to the new range.

This class is not included in @var{store_std_classes}, because it
requires an application-specific callback.
@end deftypevar

@deftypefun error_t store_mvol_create (@w{struct store *@var{phys}}, error_t (*@var{swap_vols}) (@w{struct store *@var{store}}, @w{size_t @var{new_vol}}, @w{ssize_t @var{old_vol}}), @w{int @var{flags}}, @w{struct store **@var{store}})
Return a new store in @var{store} that multiplexes multiple physical
volumes from @var{phys} as one larger virtual volume.  @var{swap_vols}
is a function that will be called whenever reads or writes refer to a
block which is not addressable on the currently active volume.
@var{phys} is consumed.
@end deftypefun

@subsubsection @code{remap} store
@pindex @code{remap} store

@deftypevar {extern const struct store_class} store_remap_class
This store translates I/O requests into different addresses on a
different store.
@end deftypevar

@deftypefun error_t store_remap_create (@w{struct store *@var{source}}, @w{const struct store_run *@var{runs}}, @w{size_t @var{num_runs}}, @w{int @var{flags}}, @w{struct store **@var{store}})
Return a new store in @var{store} that reflects the blocks in @var{runs}
and @var{runs_len} from @var{source}; @var{source} is consumed, but
@var{runs} is not.  Unlike the @code{store_remap} function, this
function always operates by creating a new store of type @samp{remap}
which has @var{source} as a child, and so may be less efficient than
@code{store_remap} for some types of stores.
@end deftypefun


@node Store RPC Encoding
@subsection Store RPC Encoding

The store library also provides some functions which help transfer
stores between tasks via RPC:

@deftypevr {Structure} struct store_enc
This structure is used to hold the various bits that make up the
representation of a store for transmission via RPC.  See
@code{<hurd/hurd_types.h>} for an explanation of the encodings for the
various storage types.
@end deftypevr

@deftypefun void store_enc_init (@w{struct store_enc *@var{enc}}, @w{mach_port_t *@var{ports}}, @w{mach_msg_type_number_t @var{num_ports}}, @w{int *@var{ints}}, @w{mach_msg_type_number_t @var{num_ints}}, @w{off_t *@var{offsets}}, @w{mach_msg_type_number_t @var{num_offsets}}, @w{char *@var{data}}, @w{mach_msg_type_number_t @var{data_len}})
Initialize @var{enc}.  The given vector and sizes will be used for the
encoding if they are big enough (otherwise new ones will be
automatically allocated).
@end deftypefun

@deftypefun void store_enc_dealloc (@w{struct store_enc *@var{enc}})
Deallocate storage used by the fields in @var{enc} (but nothing is done
with @var{enc} itself).
@end deftypefun

@deftypefun void store_enc_return (@w{struct store_enc *@var{enc}}, @w{mach_port_t **@var{ports}}, @w{mach_msg_type_number_t *@var{num_ports}}, @w{int **@var{ints}}, @w{mach_msg_type_number_t *@var{num_ints}}, @w{off_t **@var{offsets}}, @w{mach_msg_type_number_t *@var{num_offsets}}, @w{char **@var{data}}, @w{mach_msg_type_number_t *@var{data_len}})
Copy out the parameters from @var{enc} into the given variables suitably
for returning from a @code{file_get_storage_info} RPC, and deallocate
@var{enc}.
@end deftypefun

@deftypefun error_t store_return (@w{const struct store *@var{store}}, @w{mach_port_t **@var{ports}}, @w{mach_msg_type_number_t *@var{num_ports}}, @w{int **@var{ints}}, @w{mach_msg_type_number_t *@var{num_ints}}, @w{off_t **@var{offsets}}, @w{mach_msg_type_number_t *@var{num_offsets}}, @w{char **@var{data}}, @w{mach_msg_type_number_t *@var{data_len}})
Encode @var{store} into the given return variables, suitably for
returning from a @code{file_get_storage_info} RPC.
@end deftypefun

@deftypefun error_t store_encode (@w{const struct store *@var{store}}, @w{struct store_enc *@var{enc}})
Encode @var{store} into @var{enc}, which should have been prepared with
@code{store_enc_init}, or return an error.  The contents of @var{enc}
may then be returned as the value of @code{file_get_storage_info}; if
for some reason this can't be done, @code{store_enc_dealloc} may be used
to deallocate the memory used by the unsent vectors.
@end deftypefun

@deftypefun error_t store_decode (@w{struct store_enc *@var{enc}}, @w{const struct store_class *const *@var{classes}}, @w{struct store **@var{store}})
Decode @var{enc}, either returning a new store in @var{store}, or an
error.  @var{classes} is the mapping from Hurd storage class ids to store
classes; if it is zero, @var{store_std_classes} is used.  If nothing
else is to be done with @var{enc}, its contents may then be freed using
@code{store_enc_dealloc}.
@end deftypefun

@deftypefun error_t store_allocate_child_encodings (@w{const struct store *@var{store}}, @w{struct store_enc *@var{enc}})
Calls the @code{allocate_encoding} method in each child store of
@var{store}, propagating any errors.  If any child does not have such a
method, @code{EOPNOTSUPP} is returned.
@end deftypefun

@deftypefun error_t store_encode_children (@w{const struct store *@var{store}}, @w{struct store_enc *@var{enc}})
Calls the encode method in each child store of @var{store}, propagating
any errors.  If any child does not have such a method, @code{EOPNOTSUPP}
is returned.
@end deftypefun

@deftypefun error_t store_decode_children (@w{struct store_enc *@var{enc}}, @w{int @var{num_children}}, @w{const struct store_class *const *@var{classes}}, @w{struct store **@var{children}})
Decodes @var{num_children} from @var{enc}, storing the results into
successive positions in @var{children}.
@end deftypefun

@deftypefun error_t store_with_decoded_runs (@w{struct store_enc *@var{enc}}, @w{size_t @var{num_runs}}, error_t (*@var{fun}) (@w{const struct store_run *@var{runs}}, @w{size_t @var{num_runs}}))
Call @var{fun} with the vector @var{runs} of length @var{num_runs}
extracted from @var{enc}.
@end deftypefun

@deftypefun error_t store_std_leaf_allocate_encoding (@w{const struct store *@var{store}}, @w{struct store_enc *@var{enc}})
@deftypefunx error_t store_std_leaf_encode (@w{const struct store *@var{store}}, @w{struct store_enc *@var{enc}})
Standard encoding used for most data-providing (as opposed to filtering)
store classes.
@end deftypefun

@deftypefn {Typedef} typedef error_t (*store_std_leaf_create_t) (@w{mach_port_t @var{port}}, @w{int @var{flags}}, @w{size_t @var{block_size}}, @w{const struct store_run *@var{runs}}, @w{size_t @var{num_runs}}, @w{struct store **@var{store}})
Creation function used by @code{store_std_leaf_decode}.
@end deftypefn

@deftypefun error_t store_std_leaf_decode (@w{struct store_enc *@var{enc}}, @w{store_std_leaf_create_t @var{create}}, @w{struct store **@var{store}})
Decodes the standard leaf encoding which is common to various builtin
formats, and calls @var{create} to actually create the store.
@end deftypefun


@node Stored Filesystems
@chapter Stored Filesystems
@cindex disk-based filesystems
@cindex filesystems, disk-based

Stored filesystems allow users to save and load persistent data from any
random-access storage media, such as hard disks, floppy diskettes, and
CD-ROMs.  Stored filesystems are required for bootstrapping standalone
workstations, as well.

@menu
* Repairing Filesystems::       Recovering from minor filesystem crashes.
* Linux Extended 2 FS::         The popular Linux filesystem format.
* BSD Unix FS::                 The BSD Unix 4.x Fast File System.
* ISO-9660 CD-ROM FS::          Standard CD-ROM format.
* Diskfs Library::              Implementing new filesystem servers.
@end menu


@node Repairing Filesystems
@section Repairing Filesystems
@pindex fsck

FIXME: finish


@node Linux Extended 2 FS
@section Linux Extended 2 FS
@pindex ext2fs

FIXME: finish


@node BSD Unix FS
@section BSD Unix FS
@scindex ufs

FIXME: finish


@node ISO-9660 CD-ROM FS
@section ISO-9660 CD-ROM FS
@pindex iso9660fs

FIXME: finish


@node Diskfs Library
@section Diskfs Library
@scindex libdiskfs
@scindex diskfs.h

The diskfs library is declared in @code{<hurd/diskfs.h>}, and does a lot
of the work of implementing stored filesystems.  @code{libdiskfs}
requires the threads, ports, iohelp, fshelp, and store libraries.  You
should understand all these libraries before you attempt to use diskfs,
and you should also be familiar with the pager library (@pxref{Pager
Library}).

@scindex libstorefs
For historical reasons, the library for implementing stored filesystems
is called @code{libdiskfs} instead of @code{libstorefs}.  Keep in mind,
however, that diskfs is useful for filesystems which are implemented on
any block-addressed storage device, since it uses the store library to
do I/O.

Note that stored filesystems can be tricky to implement, since the
diskfs callback interfaces are not trivial.  It really is best if you
examine the source code of a similar existing filesystem server, and
follow its example rather than trying to write your own from scratch.

@menu
* Diskfs Startup::              Initializing stored filesystems.
* Diskfs Arguments::            Parsing command-line arguments.
* Diskfs Globals::              Global behaviour modification.
* Diskfs Node Management::      Allocation, reference counting, I/O,
                                  caching, and other disk node routines.
* Diskfs Callbacks::            Mandatory user-defined diskfs functions.
* Diskfs Options::              Optional user-defined diskfs functions.
* Diskfs Internals::            Reimplementing small pieces of diskfs.
@end menu


@node Diskfs Startup
@subsection Diskfs Startup

This subsection gives an outline of the general steps involved in
implementing a filesystem server, to help refresh your memory and to
offer explanations rather than to serve as a tutorial.

The first thing a filesystem server should do is parse its command-line
arguments (@pxref{Diskfs Arguments}).  Then, the standard output and
error streams should be redirected to the console, so that error
messages are not lost if this is the bootstrap filesystem:

@deftypefun void diskfs_console_stdio (void)
Redirect error messages to the console, so that they can be seen by
users.
@end deftypefun

The following is a list of the relevant functions which would be called
during the rest of the server initialization.  Again, you should refer
to the implementation of an already-working filesystem if you have any
questions about how these functions should be used:

@deftypefun error_t diskfs_init_diskfs (void)
Call this function after arguments have been parsed to initialize the
library.  You must call this before calling any other diskfs functions,
and after parsing diskfs options.
@end deftypefun

@deftypefun void diskfs_spawn_first_thread (void)
Call this after all format-specific initialization is done (except for
setting @code{diskfs_root_node}); at this point the pagers should be
ready to go.
@end deftypefun

@deftypefun mach_port_t diskfs_startup_diskfs (@w{mach_port_t @var{bootstrap}}, @w{int @var{flags}})
Call this once the filesystem is fully initialized, to advertise the new
filesystem control port to our parent filesystem.  If @var{bootstrap} is set,
diskfs will call @code{fsys_startup} on that port as appropriate and return
the @var{realnode} from that call; otherwise we call
@code{diskfs_start_bootstrap} and return @code{MACH_PORT_NULL}.
@var{flags} specifies how to open @var{realnode} (from the @code{O_*} set).
@end deftypefun

You should not need to call the following function directly, since
@code{diskfs_startup_diskfs} will do it for you, when appropriate:

@deftypefun void diskfs_start_bootstrap (void)
Start the Hurd bootstrap sequence as if we were the bootstrap filesystem
(that is, @code{diskfs_boot_flags} is nonzero).  All filesystem
initialization must be complete before you call this function.
@end deftypefun


@node Diskfs Arguments
@subsection Diskfs Arguments

The following functions implement standard diskfs command-line and
runtime argument parsing, using argp (@pxref{Argp, , , libc, The GNU C
Library Reference Manual}):

@deftypefun error_t diskfs_set_options (@w{char *@var{argz}}, @w{size_t @var{argz_len}})
Parse and execute the runtime options specified by @var{argz} and
@var{argz_len}.  @code{EINVAL} is returned if some option is
unrecognized.  The default definition of this routine will parse them
using @code{diskfs_runtime_argp}.
@end deftypefun

@deftypefun error_t diskfs_append_args (@w{char **@var{argz}}, @w{unsigned *@var{argz_len}})
Append to the malloced string @code{*@var{argz}} of length
@code{*@var{argz_len}} a NUL-separated list of the arguments to this
translator.  The default definition of this routine simply calls
@code{diskfs_append_std_options}.
@end deftypefun

@deftypefun error_t diskfs_append_std_options (@w{char **@var{argz}}, @w{unsigned *@var{argz_len}})
@emph{Appends} NUL-separated options describing the standard diskfs
option state to @var{argz} and increments @var{argz_len} appropriately.
Note that unlike @code{diskfs_get_options}, @var{argz} and
@var{argz_len} must already have sane values.
@end deftypefun

@deftypevar {struct argp *} diskfs_runtime_argp
If this is defined or set to an argp structure, it will be used by the
default @code{diskfs_set_options} to handle runtime option parsing.  The
default definition is initialized to a pointer to
@code{diskfs_std_runtime_argp}.
@end deftypevar

@deftypevar {const struct argp} diskfs_std_runtime_argp
An argp for the standard diskfs runtime options.  The default definition
of @code{diskfs_runtime_argp} points to this, although the user can
redefine that to chain this onto his own argp.
@end deftypevar

@deftypevar {const struct argp} diskfs_startup_argp
An argp structure for the standard diskfs command line arguments.  The
user may call @code{argp_parse} on this to parse the command line, chain
it onto the end of his own argp structure, or ignore it completely.
@end deftypevar

@deftypevar {const struct argp} diskfs_store_startup_argp
An argp structure for the standard diskfs command line arguments plus a
store specification.  The address of a location in which to return the
resulting @code{struct store_parsed} structure should be passed as the
input argument to @code{argp_parse}; FIXME xref the declaration for
STORE_ARGP.
@end deftypevar


@node Diskfs Globals
@subsection Diskfs Globals

The following functions and variables control the overall behaviour of
the library.  Your callback functions may need to refer to these, but
you should not need to modify or redefine them.

@deftypevar mach_port_t diskfs_default_pager
@deftypevarx mach_port_t diskfs_exec_ctl
@deftypevarx mach_port_t diskfs_exec
@deftypevarx auth_t diskfs_auth_server_port
These are the respective send rights to the default pager, execserver
control port, execserver itself, and authserver.
@end deftypevar

@deftypevar mach_port_t diskfs_fsys_identity
The @code{io_identity} identity port for the filesystem.
@end deftypevar

@deftypevar {char **} diskfs_argv
The command line with which diskfs was started, set by the default argument parser.
If you don't use it, set this yourself.  This is only used for bootstrap
file systems, to give the procserver.
@end deftypevar

@deftypevar {char *} diskfs_boot_flags
When this is a bootstrap filesystem, the command line options passed from
the kernel.  If not a bootstrap filesystem, it is zero, so it can be used to
distinguish between the two cases.
@end deftypevar

@deftypevar {struct rwlock} diskfs_fsys_lock
Hold this lock while doing filesystem-level operations.  Innocuous users
can just hold a reader lock, but operations that might corrupt other
threads should hold a writer lock.
@end deftypevar

@deftypevar {volatile struct mapped_time_value *} diskfs_mtime
The current system time, as used by the diskfs routines.  This is
converted into a @code{struct timeval} by the @code{maptime_read}
C library function (FIXME xref).
@end deftypevar

@deftypevar int diskfs_synchronous
True if and only if we should do every operation synchronously.  It
is the format-specific code's responsibility to keep allocation
information permanently in sync if this is set; the rest will
be done by format-independent code.
@end deftypevar

@deftypefun error_t diskfs_set_sync_interval (@w{int @var{interval}})
Establish a thread to sync the filesystem every @var{interval} seconds,
or never, if @var{interval} is zero.  If an error occurs creating the
thread, it is returned, otherwise zero.  Subsequent calls will create a
new thread and (eventually) get rid of the old one; the old thread won't
do any more syncs, regardless.
@end deftypefun

@deftypevar spin_lock_t diskfs_node_refcnt_lock
Pager reference count lock.
@end deftypevar

@deftypevar int diskfs_readonly
Set to zero if the filesystem is currently writable.
@end deftypevar

@deftypefun error_t diskfs_set_readonly (@w{int @var{readonly}})
Change an active filesystem between read-only and writable modes,
setting the global variable @var{diskfs_readonly} to reflect the current
mode.  If an error is returned, nothing will have changed.
@var{diskfs_fsys_lock} should be held while calling this routine.
@end deftypefun

@deftypefun int diskfs_check_readonly (void)
Check if the filesystem is readonly before an operation that writes it.
Return nonzero if readonly, otherwise zero.
@end deftypefun

@deftypefun error_t diskfs_remount (void)
Reread all in-core data structures from disk.  This function can only be
successful if @var{diskfs_readonly} is true.  @var{diskfs_fsys_lock}
should be held while calling this routine.
@end deftypefun

@deftypefun error_t diskfs_shutdown (@w{int @var{flags}})
Shutdown the filesystem; @var{flags} are as for @code{fsys_shutdown}.
@end deftypefun


@node Diskfs Node Management
@subsection Diskfs Node Management

Every file or directory is a diskfs @dfn{node}.  The following functions
help your diskfs callbacks manage nodes and their references:

@deftypefun void diskfs_drop_node (@w{struct node *@var{np}})
Node @var{np} now has no more references; clean all state.  The
@var{diskfs_node_refcnt_lock} must be held, and will be released upon
return.  @var{np} must be locked.
@end deftypefun

@deftypefun void diskfs_node_update (@w{struct node *@var{np}}, @w{int @var{wait}})
Set disk fields from @code{@var{np}->dn_stat}; update ctime, atime, and mtime
if necessary.  If @var{wait} is true, then return only after the
physical media has been completely updated.
@end deftypefun

@deftypefun void diskfs_nref (@w{struct node *@var{np}})
Add a hard reference to node @var{np}.  If there were no hard references
previously, then the node cannot be locked (because you must hold a hard
reference to hold the lock).
@end deftypefun

@deftypefun void diskfs_nput (@w{struct node *@var{np}})
Unlock node @var{np} and release a hard reference; if this is the last
hard reference and there are no links to the file then request light
references to be dropped.
@end deftypefun

@deftypefun void diskfs_nrele (@w{struct node *@var{np}})
Release a hard reference on @var{np}.  If @var{np} is locked by anyone,
then this cannot be the last hard reference (because you must hold a
hard reference in order to hold the lock).  If this is the last hard
reference and there are no links, then request light references to be
dropped.
@end deftypefun

@deftypefun void diskfs_nref_light (@w{struct node *@var{np}})
Add a light reference to a node.
@end deftypefun

@deftypefun void diskfs_nput_light (@w{struct node *@var{np}})
Unlock node @var{np} and release a light reference.
@end deftypefun

@deftypefun void diskfs_nrele_light (@w{struct node *@var{np}})
Release a light reference on @var{np}.  If @var{np} is locked by anyone,
then this cannot be the last reference (because you must hold a hard
reference in order to hold the lock).
@end deftypefun

@deftypefun error_t diskfs_node_rdwr (@w{struct node *@var{np}}, @w{char *@var{data}}, @w{off_t @var{off}}, @w{size_t @var{amt}}, @w{int @var{direction}}, @w{struct protid *@var{cred}}, @w{size_t *@var{amtread}})
This is called by other filesystem routines to read or write files, and
extends them automatically, if necessary.  @var{np} is the node to be
read or written, and must be locked.  @var{data} will be written or
filled.  @var{off} identifies where in the file the I/O is to take place
(negative values are not allowed).  @var{amt} is the size of @var{data}
and tells how much to copy.  @var{dir} is zero for reading or nonzero
for writing.  @var{cred} is the user doing the access (only used to
validate attempted file extension).  For reads, @code{*@var{amtread}} is
filled with the amount actually read.
@end deftypefun

@deftypefun void diskfs_notice_dirchange (@w{struct node *@var{dp}}, @w{enum dir_changed_type @var{type}}, @w{char *@var{name}})
Send notifications to users who have requested them for directory
@var{dp} with @code{dir_notice_changes}.  The type of modification and
affected name are @var{type} and @var{name} respectively.  This should
be called by @code{diskfs_direnter}, @code{diskfs_dirremove},
@code{diskfs_dirrewrite}, and anything else that changes the directory,
after the change is fully completed.
@end deftypefun

@deftypefun {struct node *} diskfs_make_node (@w{struct disknode *@var{dn}})
Create a new node structure with @var{ds} as its physical disknode.  The
new node will have one hard reference and no light references.
@end deftypefun

@c FIXME: It's odd that `hard' and `light' seem to be opposites when
@c we're talking about references.  Or is `weak' the opposite of `hard'?
@c These terms need to be explained.
@c  tb: hard is opposite to both light and weak, but we don't use both
@c  tb: light and weak in the same context, so it's ok.

These next node manipulation functions are not generally useful, but may
come in handy if you need to redefine any diskfs functions.

@deftypefun error_t diskfs_create_node (@w{struct node *@var{dir}}, @w{char *@var{name}}, @w{mode_t @var{mode}}, @w{struct node **@var{newnode}}, @w{struct protid *@var{cred}}, @w{struct dirstat *@var{ds}})
Create a new node.  Give it @var{mode}: if @var{mode} includes
@code{IFDIR}, also initialize @file{.} and @file{..} in the new
directory.  Return the node in @var{npp}.  @var{cred} identifies the
user responsible for the call.  If @var{name} is nonzero, then link the
new node into @var{dir} with name @var{name}; @var{ds} is the result of
a prior @code{diskfs_lookup} for creation (and @var{dir} has been held
locked since).  @var{dir} must always be provided as at least a hint for
disk allocation strategies.
@end deftypefun

@deftypefun void diskfs_set_node_times (@w{struct node *@var{np}})
If @code{@var{np}->dn_set_ctime} is set, then modify
@code{@var{np}->dn_stat.st_ctime} appropriately; do the analogous
operations for atime and mtime as well.
@end deftypefun

@deftypefun {struct node *} diskfs_check_lookup_cache (@w{struct node *@var{dir}}, @w{char *@var{name}})
Scan the cache looking for @var{name} inside @var{dir}.  If we don't
know any entries at all, then return zero.  If the entry is confirmed to
not exist, then return -1.  Otherwise, return @var{np} for the entry,
with a newly-allocated reference.
@end deftypefun

@deftypefun error_t diskfs_cached_lookup (@w{int @var{cache_id}}, @w{struct node **@var{npp}})
Return the node corresponding to @var{cache_id} in @code{*@var{npp}}.
@end deftypefun

@deftypefun void diskfs_enter_lookup_cache (@w{struct node *@var{dir}}, @w{struct node *@var{np}}, @w{char *@var{name}})
Node @var{np} has just been found in @var{dir} with @var{name}.  If
@var{np} is null, that means that this name has been confirmed as absent
in the directory.
@end deftypefun

@deftypefun void diskfs_purge_lookup_cache (@w{struct node *@var{dp}}, @w{struct node *@var{np}})
Purge all references in the cache to @var{np} as a node inside directory
@var{dp}.
@end deftypefun


@node Diskfs Callbacks
@subsection Diskfs Callbacks

Like several other Hurd libraries, @code{libdiskfs} depends on you to
implement application-specific callback functions.  You @emph{must}
define the following functions and variables, but you should also look
at @ref{Diskfs Options}, as there are several defaults which should be
modified to provide good filesystem support:

@deftypevr {Structure} struct dirstat
You must define this type, which will hold information between a call to
@code{diskfs_lookup} and a call to one of @code{diskfs_direnter},
@code{diskfs_dirremove}, or @code{diskfs_dirrewrite}.  It must contain
enough information so that those calls work as described below.
@end deftypevr

@deftypevar const size_t diskfs_dirstat_size
This must be the size in bytes of a @code{struct dirstat}.
@end deftypevar

@deftypevar int diskfs_link_max
This is the maximum number of links to any one file, which must be a
positive integer.  The implementation of @code{dir_rename} does not know
how to succeed if this is only one allowed link; on such formats you
need to reimplement @code{dir_rename} yourself.
@end deftypevar

@deftypevar int diskfs_maxsymlinks
This variable is a positive integer which is the maximum number of
symbolic links which can be traversed within a single call to
@code{dir_lookup}.  If this is exceeded, @code{dir_lookup} will
return @code{ELOOP}.
@end deftypevar

@deftypevar {struct node *} diskfs_root_node
Set this to be the node of the root of the filesystem.
@end deftypevar

@deftypevar {char *} diskfs_server_name
Set this to the name of the filesystem server.
@end deftypevar

@deftypevar {char *} diskfs_server_version
Set this to be the server version string.
@end deftypevar

@deftypevar {char *} diskfs_disk_name
This should be a string that somehow identifies the particular disk this
filesystem is interpreting.  It is generally only used to print messages
or to distinguish instances of the same filesystem type from one
another.  If this filesystem accesses no external media, then define
this to be zero.
@end deftypevar

@deftypefun error_t diskfs_set_statfs (@w{fsys_statfsbuf_t *@var{statfsbuf}})
Set @code{*@var{statfsbuf}} with appropriate values to reflect the
current state of the filesystem.
@end deftypefun

@deftypefun error_t diskfs_lookup (@w{struct node *@var{dp}}, @w{char *@var{name}}, @w{enum lookup_type @var{type}}, @w{struct node **@var{np}}, @w{struct dirstat *@var{ds}}, @w{struct protid *@var{cred}})
@deftypefunx error_t diskfs_lookup_hard (@w{struct node *@var{dp}}, @w{char *@var{name}}, @w{enum lookup_type @var{type}}, @w{struct node **@var{np}}, @w{struct dirstat *@var{ds}}, @w{struct protid *@var{cred}})
You should not define @code{diskfs_lookup}, because it is simply a
wrapper for @code{diskfs_lookup_hard}, and is already defined in
@code{libdiskfs}.

Lookup in directory @var{dp} (which is locked) the name @var{name}.
@var{type} will either be @code{LOOKUP}, @code{CREATE}, @code{RENAME},
or @code{REMOVE}.  @var{cred} identifies the user making the call.

If the name is found, return zero, and (if @var{np} is nonzero) set
@code{*@var{np}} to point to the node for it, which should be locked.
If the name is not found, return @code{ENOENT}, and (if @var{np} is
nonzero) set @code{*@var{np}} to zero.  If @var{np} is zero, then the
node found must not be locked, not even transitorily.  Lookups for
@code{REMOVE} and @code{RENAME} (which must often check permissions on
the node being found) will always set @var{np}.

If @var{ds} is nonzero then the behaviour varies depending on the
requested lookup @var{type}:

@table @code
@item LOOKUP
Set @code{*@var{ds}} to be ignored by @code{diskfs_drop_dirstat}

@item CREATE
On success, set @code{*@var{ds}} to be ignored by
@code{diskfs_drop_dirstat}. @*
On failure, set @code{*@var{ds}} for a future call to
@code{diskfs_direnter}.

@item RENAME
On success, set @code{*@var{ds}} for a future call to
@code{diskfs_dirrewrite}. @*
On failure, set @code{*@var{ds}} for a future call to
@code{diskfs_direnter}.

@item REMOVE
On success, set @code{*@var{ds}} for a future call to
@code{diskfs_dirremove}. @*
On failure, set @code{*@var{ds}} to be ignored by
@code{diskfs_drop_dirstat}.
@end table

The caller of this function guarantees that if @var{ds} is nonzero, then
either the appropriate call listed above or @code{diskfs_drop_dirstat}
will be called with @var{ds} before the directory @var{dp} is unlocked,
and guarantees that no lookup calls will be made on this directory
between this lookup and the use (or destruction) of *DS.

If you use the library's versions of @code{diskfs_rename_dir},
@code{diskfs_clear_directory}, and @code{diskfs_init_dir}, then lookups
for @file{..} might have the flag @code{SPEC_DOTDOT} ORed in.  This has a
special meaning depending on the requested lookup @var{type}:

@table @code
@item LOOKUP
@var{dp} should be unlocked and its reference dropped before returning.

@item CREATE
Ignore this case, because @code{SPEC_DOTDOT} is guaranteed not to be
given.

@item RENAME
@itemx REMOVE
In both of these cases, the node being found (@code{*@var{np}}) is
already held locked, so don't lock it or add a reference to it.
@end table

Return @code{ENOENT} if @var{name} isn't in the directory.  Return
@code{EAGAIN} if @var{name} refers to the @file{..} of this filesystem's
root.  Return @code{EIO} if appropriate.
@end deftypefun

@deftypefun error_t diskfs_direnter (@w{struct node *@var{dp}}, @w{char *@var{name}}, @w{struct node *@var{np}}, @w{struct dirstat *@var{ds}}, @w{struct protid *@var{cred}})
@deftypefunx error_t diskfs_direnter_hard (@w{struct node *@var{dp}}, @w{char *@var{name}}, @w{struct node *@var{np}}, @w{struct dirstat *@var{ds}}, @w{struct protid *@var{cred}})
You should not define @code{diskfs_direnter}, because it is simply a
wrapper for @code{diskfs_direnter_hard}, and is already defined in
@code{libdiskfs}.

Add @var{np} to directory @var{dp} under the name @var{name}.  This will
only be called after an unsuccessful call to @code{diskfs_lookup} of type
@code{CREATE} or @code{RENAME}; @var{dp} has been locked continuously
since that call and @var{ds} is as that call set it, @var{np} is locked.
@var{cred} identifies the user responsible for the call (to be used only
to validate directory growth).
@end deftypefun

@deftypefun error_t diskfs_dirrewrite (@w{struct node *@var{dp}}, @w{struct node *@var{oldnp}}, @w{struct node *@var{np}}, @w{char *@var{name}}, @w{struct dirstat *@var{ds}})
@deftypefunx error_t diskfs_dirrewrite_hard (@w{struct node *@var{dp}}, @w{struct node *@var{np}}, @w{struct dirstat *@var{ds}})
You should not define @code{diskfs_dirrewrite}, because it is simply a
wrapper for @code{diskfs_dirrewrite_hard}, and is already defined in
@code{libdiskfs}.

This will only be called after a successful call to @code{diskfs_lookup}
of type @code{RENAME}; this call should change the name found in
directory @var{dp} to point to node @var{np} instead of its previous
referent.  @var{dp} has been locked continuously since the call to
@code{diskfs_lookup} and @var{ds} is as that call set it; @var{np} is
locked.

@code{diskfs_dirrewrite} has some additional specifications: @var{name}
is the name within @var{dp} which used to correspond to the previous
referent, @var{oldnp}; it is this reference which is being rewritten.
@code{diskfs_dirrewrite} also calls @code{diskfs_notice_dirchange} if
@code{@var{dp}->dirmod_reqs} is nonzero.
@end deftypefun

@deftypefun error_t diskfs_dirremove (@w{struct node *@var{dp}}, @w{struct node *@var{np}}, @w{char *@var{name}}, @w{struct dirstat *@var{ds}})
@deftypefunx error_t diskfs_dirremove_hard (@w{struct node *@var{dp}}, @w{struct dirstat *@var{ds}})
You should not define @code{diskfs_dirremove}, because it is simply a
wrapper for @code{diskfs_dirremove_hard}, and is already defined in
@code{libdiskfs}.

This will only be called after a successful call to @code{diskfs_lookup}
of type @code{REMOVE}; this call should remove the name found from the
directory @var{ds}.  @var{dp} has been locked continuously since the
call to @code{diskfs_lookup} and @var{ds} is as that call set it.

@code{diskfs_dirremove} has some additional specifications: this routine
should call @code{diskfs_notice_dirchange} if
@code{@var{dp}->dirmod_reqs} is nonzero.  The entry being removed has
name @var{name} and refers to @var{np}.
@end deftypefun

@deftypefun error_t diskfs_drop_dirstat (@w{struct node *@var{dp}}, @w{struct dirstat *@var{ds}})
@var{ds} has been set by a previous call to @code{diskfs_lookup} on
directory @var{dp}; this function is guaranteed to be called if
@code{diskfs_direnter}, @code{diskfs_dirrewrite}, and
@code{diskfs_dirremove} have not been called, and should free any state
retained by a @code{struct dirstat}.  @var{dp} has been locked
continuously since the call to @code{diskfs_lookup}.
@end deftypefun

@deftypefun void diskfs_null_dirstat (@w{struct dirstat *@var{ds}})
Initialize @var{ds} such that @code{diskfs_drop_dirstat} will ignore it.
@end deftypefun

@deftypefun error_t diskfs_get_directs (@w{struct node *@var{dp}}, @w{int @var{entry}}, @w{int @var{n}}, @w{char **@var{data}}, @w{u_int *@var{datacnt}}, @w{vm_size_t @var{bufsiz}}, @w{int *@var{amt}})
Return @var{n} directory entries starting at @var{entry} from locked
directory node @var{dp}.  Fill @code{*@var{data}} with the entries;
which currently points to @code{*@var{datacnt}} bytes.  If it isn't big
enough, @code{vm_allocate} into @code{*@var{data}}.  Set
@code{*@var{datacnt}} with the total size used.  Fill @var{amt} with the
number of entries copied.  Regardless, never copy more than @var{bufsiz}
bytes.  If @var{bufsiz} is zero, then there is no limit on
@code{*@var{datacnt}}; if @var{n} is -1, then there is no limit on
@var{amt}.
@end deftypefun

@deftypefun int diskfs_dirempty (@w{struct node *@var{dp}}, @w{struct protid *@var{cred}})
Return nonzero if locked directory @var{dp} is empty.  If the user has
not redefined @code{diskfs_clear_directory} and
@code{diskfs_init_directory}, then `empty' means `only possesses entries
labelled @file{.} and @file{..}.  @var{cred} identifies the user making
the call@dots{} if this user cannot search the directory, then this
routine should fail.
@end deftypefun

@deftypefun error_t diskfs_get_translator (@w{struct node *@var{np}}, @w{char **@var{namep}}, @w{u_int *@var{namelen}})
For locked node @var{np} (for which @code{diskfs_node_translated} is
true) look up the name of its translator.  Store the name into newly
malloced storage and set @code{*@var{namelen}} to the total length.
@end deftypefun

@deftypefun error_t diskfs_set_translator (@w{struct node *@var{np}}, @w{char *@var{name}}, @w{u_int @var{namelen}}, @w{struct protid *@var{cred}})
For locked node @var{np}, set the name of the translating program to be
@var{name}, which is @var{namelen} bytes long.  @var{cred} identifies
the user responsible for the call.
@end deftypefun

@deftypefun error_t diskfs_truncate (@w{struct node *@var{np}}, @w{off_t @var{size}})
Truncate locked node @var{np} to be @var{size} bytes long.  If @var{np}
is already less than or equal to @var{size} bytes long, do nothing.  If
this is a symlink (and @code{diskfs_shortcut_symlink} is set) then this
should clear the symlink, even if @code{diskfs_create_symlink_hook}
stores the link target elsewhere.
@end deftypefun

@deftypefun error_t diskfs_grow (@w{struct node *@var{np}}, @w{off_t @var{size}}, @w{struct protid *@var{cred}})
Grow the disk allocated to locked node @var{np} to be at least
@var{size} bytes, and set @code{@var{np}->allocsize} to the actual
allocated size.  If the allocated size is already @var{size} bytes, do
nothing.  @var{cred} identifies the user responsible for the call.
@end deftypefun

@deftypefun error_t diskfs_node_reload (@w{struct node *@var{node}})
This function must reread all data specific to @var{node} from disk,
without writing anything.  It is always called with
@var{diskfs_readonly} set to true.
@end deftypefun

@deftypefun error_t diskfs_reload_global_state (void)
This function must invalidate all cached global state, and reread it as
necessary from disk, without writing anything.  It is always called with
@var{diskfs_readonly} set to true.  @code{diskfs_node_reload} is
subsequently called on all active nodes, so this call doesn't need to
reread any node-specific data.
@end deftypefun

@deftypefun error_t diskfs_node_iterate (error_t (*@var{fun}) (@w{struct node *@var{np}}))
For each active node @var{np}, call @var{fun}.  The node is to be locked
around the call to @var{fun}.  If @var{fun} returns nonzero for any
node, then stop immediately, and return that value.
@end deftypefun

@deftypefun error_t diskfs_alloc_node (@w{struct node *@var{dp}}, @w{mode_t @var{mode}}, @w{struct node **@var{np}})
Allocate a new node to be of mode @var{mode} in locked directory
@var{dp}, but don't actually set the mode or modify the directory, since
that will be done by the caller.  The user responsible for the request
can be identified with @var{cred}.  Set @code{*@var{np}} to be the newly
allocated node.
@end deftypefun

@deftypefun void diskfs_free_node (@w{struct node *@var{np}}, @w{mode_t @var{mode}})
Free node @var{np}; the on-disk copy has already been synchronized with
@code{diskfs_node_update} (where @code{@var{np}->dn_stat.st_mode} was
zero).  @var{np}'s mode used to be @var{mode}.
@end deftypefun

@deftypefun void diskfs_lost_hardrefs (@w{struct node *@var{np}})
Locked node @var{np} has some light references but has just lost its
last hard reference.
@end deftypefun

@deftypefun void diskfs_new_hardrefs (@w{struct node *@var{np}})
Locked node @var{np} has just acquired a hard reference where it had
none previously.  Therefore, it is okay again to have light references
without real users.
@end deftypefun

@deftypefun void diskfs_try_dropping_softrefs (@w{struct node *@var{np}})
Node @var{np} has some light references, but has just lost its last hard
references.  Take steps so that if any light references can be freed,
they are.  Both @var{diskfs_node_refcnt_lock} and @var{np} are locked.
This function will be called after @code{diskfs_lost_hardrefs}.
@end deftypefun

@deftypefun void diskfs_node_norefs (@w{struct node *@var{np}})
Node @var{np} has no more references; free local state, including
@code{*@var{np}} if it shouldn't be retained.
@var{diskfs_node_refcnt_lock} is held.
@end deftypefun

@deftypefun error_t diskfs_set_hypermetadata (@w{int @var{wait}}, @w{int @var{clean}})
Write any non-paged metadata from format-specific buffers to disk,
asynchronously unless @var{wait} is nonzero.  If @var{clean} is nonzero,
then after this is written the filesystem will be absolutely clean, and
it must be possible for the non-paged metadata to indicate that fact.
@end deftypefun

@deftypefun void diskfs_write_disknode (@w{struct node *@var{np}}, @w{int @var{wait}})
Write the information in @code{@var{np}->dn_stat} and any associated
format-specific information to the disk.  If @var{wait} is true, then
return only after the physical media has been completely updated.
@end deftypefun

@deftypefun void diskfs_file_update (@w{struct node *@var{np}}, @w{int @var{wait}})
Write the contents and all associated metadata of file NP to disk.
Generally, this will involve calling @code{diskfs_node_update} for much
of the metadata.  If @var{wait} is true, then return only after the
physical media has been completely updated.
@end deftypefun

@deftypefun mach_port_t diskfs_get_filemap (@w{struct node *@var{np}}, @w{vm_prot_t @var{prot}})
Return a memory object port (send right) for the file contents of
@var{np}.  @var{prot} is the maximum allowable access.  On errors,
return @code{MACH_PORT_NULL} and set @code{errno}.
@end deftypefun

@deftypefun {struct pager *} diskfs_get_filemap_pager_struct (@w{struct node *@var{np}})
Return a @code{struct pager *} that refers to the pager returned by
diskfs_get_filemap for locked node NP, suitable for use as an argument
to @code{pager_memcpy}.
@end deftypefun

@deftypefun vm_prot_t diskfs_max_user_pager_prot (void)
Return the bitwise OR of the maximum @code{prot} parameter (the second
argument to @code{diskfs_get_filemap}) for all active user pagers.
@end deftypefun

@deftypefun int diskfs_pager_users (void)
Return nonzero if there are pager ports exported that might be in use by
users.  Further pager creation should be blocked before this function
returns zero.
@end deftypefun

@deftypefun void diskfs_sync_everything (@w{int @var{wait}})
Sync all the pagers and write any data belonging on disk except for the
hypermetadata.  If @var{wait} is true, then return only after the
physical media has been completely updated.
@end deftypefun

@deftypefun void diskfs_shutdown_pager (void)
Shut down all pagers.  This is irreversible, and is done when the
filesystem is exiting.
@end deftypefun


@node Diskfs Options
@subsection Diskfs Options

The functions and variables described in this subsection already have
default definitions in @code{libdiskfs}, so you are not forced to define
them; rather, they may be redefined on a case-by-case basis.

You should set the values of any option variables as soon as your program
starts (before you make any calls to diskfs, such as argument parsing).

@deftypevar int diskfs_hard_readonly
You should set this variable to nonzero if the filesystem media can
never be made writable.
@end deftypevar

@deftypevar {char *} diskfs_extra_version
Set this to be any additional version specification that should be
printed for --version.
@end deftypevar

@deftypevar int diskfs_shortcut_symlink
This should be nonzero if and only if the filesystem format supports
shortcutting symbolic link translation.  The library guarantees that
users will not be able to read or write the contents of the node
directly, and the library will only do so if the symlink hook functions
(@code{diskfs_create_symlink_hook} and @code{diskfs_read_symlink_hook})
return @code{EINVAL} or are not defined.  The library knows that the
@code{dn_stat.st_size} field is the length of the symlink, even if the
hook functions are used.
@end deftypevar

@deftypevar int diskfs_shortcut_chrdev
@deftypevarx int diskfs_shortcut_blkdev
@deftypevarx int diskfs_shortcut_fifo
@deftypevarx int diskfs_shortcut_ifsock
These variables should be nonzero if and only if the filesystem format
supports shortcutting character device node, block device node, FIFO, or
Unix-domain socket translation, respectively.
@end deftypevar

@deftypevar int diskfs_default_sync_interval
@code{diskfs_set_sync_interval} is called with this value when the first
diskfs thread is started up (in @code{diskfs_spawn_first_thread}).  This
variable has a default default value of 30, which causes disk buffers to
be flushed at least every 30 seconds.
@end deftypevar

@deftypefun error_t diskfs_validate_mode_change (@w{struct node *@var{np}}, @w{mode_t @var{mode}})
@deftypefunx error_t diskfs_validate_owner_change (@w{struct node *@var{np}}, @w{uid_t @var{uid}})
@deftypefunx error_t diskfs_validate_group_change (@w{struct node *@var{np}}, @w{gid_t @var{gid}})
@deftypefunx error_t diskfs_validate_author_change (@w{struct node *@var{np}}, @w{uid_t @var{author}})
@deftypefunx error_t diskfs_validate_flags_change (@w{struct node *@var{np}}, @w{int @var{flags}})
@deftypefunx error_t diskfs_validate_rdev_change (@w{struct node *@var{np}}, @w{dev_t @var{rdev}})
Return zero if for the node @var{np} can be changed as requested.  That
is, if @var{np}'s mode can be changed to @var{mode}, owner to @var{uid},
group to @var{gid}, author to @var{author}, flags to @var{flags}, or raw
device number to @var{rdev}, respectively.  Otherwise, return an error
code.

It must always be possible to clear the mode or the flags; diskfs will
not ask for permission before doing so.
@end deftypefun

@deftypefun void diskfs_readonly_changed (@w{int @var{readonly}})
This is called when the disk has been changed from read-only to
read-write mode or vice-versa.  @var{readonly} is the new state (which
is also reflected in @var{diskfs_readonly}).  This function is also
called during initial startup if the filesystem is to be writable.
@end deftypefun

@deftypefn {Variable} error_t (*diskfs_create_symlink_hook) (@w{struct node *@var{np}}, @w{char *@var{target}})
If this function pointer is nonzero (and @code{diskfs_shortcut_symlink}
is set) it is called to set a symlink.  If it returns @code{EINVAL} or
isn't set, then the normal method (writing the contents into the file
data) is used.  If it returns any other error, it is returned to the
user.
@end deftypefn

@deftypefn {Variable} error_t (*diskfs_read_symlink_hook) (@w{struct node *@var{np}}, @w{char *@var{target}})
If this function pointer is nonzero (and @code{diskfs_shortcut_symlink}
is set) it is called to read the contents of a symlink.  If it returns
@code{EINVAL} or isn't set, then the normal method (reading from the
file data) is used.  If it returns any other error, it is returned to
the user.
@end deftypefn

@deftypefun error_t diskfs_rename_dir (@w{struct node *@var{fdp}}, @w{struct node *@var{fnp}}, @w{char *@var{fromname}}, @w{struct node *@var{tdp}}, @w{char *@var{toname}}, @w{struct protid *@var{fromcred}}, @w{struct protid *@var{tocred}})
Rename directory node @var{fnp} (whose parent is @var{fdp}, and which
has name @var{fromname} in that directory) to have name @var{toname}
inside directory @var{tdp}.  None of these nodes are locked, and none
should be locked upon return.  This routine is serialized, so it doesn't
have to be reentrant.  Directories will never be renamed except by this
routine.  @var{fromcred} is the user responsible for @var{fdp} and
@var{fnp}.  @var{tocred} is the user responsible for @var{tdp}.  This
routine assumes the usual convention where @file{.}  and @file{..} are
represented by ordinary links; if that is not true for your format, you
have to redefine this function.
@end deftypefun

@deftypefun error_t diskfs_clear_directory (@w{struct node *@var{dp}}, @w{struct node *@var{pdp}}, @w{struct protid *@var{cred}})
Clear the @file{.} and @file{..} entries from directory @var{dp}.  Its
parent is @var{pdp}, and the user responsible for this is identified by
@var{cred}.  Both directories must be locked.  This routine assumes the
usual convention where @file{.} and @file{..} are represented by
ordinary links; if that is not true for your format, you have to
redefine this function.
@end deftypefun

@deftypefun error_t diskfs_init_dir (@w{struct node *@var{dp}}, @w{struct node *@var{pdp}}, @w{struct protid *@var{cred}})
Locked node @var{dp} is a new directory; add whatever links are
necessary to give it structure; its parent is the (locked) node
@var{pdp}.  This routine may not call @code{diskfs_lookup} on @var{pdp}.
The new directory must be clear within the meaning of
@code{diskfs_dirempty}.  This routine assumes the usual convention where
@file{.} and @file{..} are represented by ordinary links; if that is not
true for your format, you have to redefine this function.  @var{cred}
identifies the user making the call.
@end deftypefun


@node Diskfs Internals
@subsection Diskfs Internals

The library also exports the following functions, but they are not
generally useful unless you are redefining other functions the library
provides.

@deftypefun error_t diskfs_create_protid (@w{struct peropen *@var{po}}, @w{struct iouser *@var{user}}, @w{struct protid **@var{cred}})
Create and return a protid for an existing peropen @var{po} in
@var{cred}, referring to user @var{user}.  The node @code{@var{po}->np}
must be locked.
@end deftypefun

@deftypefun error_t diskfs_start_protid (@w{struct peropen *@var{po}}, @w{struct protid **@var{cred}})
Build and return in @var{cred} a protid which has no user
identification, for peropen @var{po}.  The node @code{@var{po}->np} must
be locked.
@end deftypefun

@deftypefun void diskfs_finish_protid (@w{struct protid *@var{cred}}, @w{struct iouser *@var{user}})
Finish building protid @var{cred} started with @code{diskfs_start_protid};
the user to install is @var{user}.
@end deftypefun

@deftypefun void diskfs_protid_rele (@w{void *@var{arg}})
Called when a protid @var{cred} has no more references.  Because
references to protids are maintained by the port management library,
this is installed in the clean routines list.  The ports library will
free the structure.
@end deftypefun

@deftypefun {struct peropen *} diskfs_make_peropen (@w{struct node *@var{np}}, @w{int @var{flags}}, @w{struct peropen *@var{context}})
Create and return a new peropen structure on node @var{np} with open
flags @var{flags}.  The initial values for the @code{root_parent},
@code{shadow_root}, and @code{shadow_root_parent} fields are copied from
@var{context} if it is nonzero, otherwise each of these values are
set to zero.
@end deftypefun

@deftypefun void diskfs_release_peropen (@w{struct peropen *@var{po}})
Decrement the reference count on @var{po}.
@end deftypefun

@deftypefun error_t diskfs_execboot_fsys_startup (@w{mach_port_t @var{port}}, @w{int @var{flags}}, @w{mach_port_t @var{ctl}}, @w{mach_port_t *@var{real}}, @w{mach_msg_type_name_t *@var{realpoly}})
This function is called by @code{S_fsys_startup} for execserver
bootstrap.  The execserver is able to function without a real node,
hence this fraud.  Arguments are as for @code{fsys_startup} in
@code{<hurd/fsys.defs>}.
@end deftypefun

@deftypefun int diskfs_demuxer (@w{mach_msg_header_t *@var{inp}}, @w{mach_msg_header_t *@var{outp}})
Demultiplex incoming @code{libports} messages on diskfs ports.
@end deftypefun

@findex diskfs_S_*
The diskfs library also provides functions to demultiplex the fs, io,
fsys, interrupt, and notify interfaces.  All the server routines have
the prefix @code{diskfs_S_}.  For those routines, @code{in} arguments of
type @code{file_t} or @code{io_t} appear as @code{struct protid *} to
the stub.


@node Twisted Filesystems
@chapter Twisted Filesystems

In the Hurd, translators are capable of redirecting filesystem requests
to other translators, which makes it possible to implement alternative
views of the same underlying data.  The translators described in this
chapter do not provide direct access to any data; rather, they are
organizational tools to help you simplify an existing physical
filesystem layout.

Be prudent with these translators: you may accidentally injure people
who want their filesystems to be rigidly tree-structured.@footnote{You
are lost in a maze of twisty little filesystems, all alike@dots{}.}

FIXME: finish

@section symlink, firmlink
@section hostmux, usermux
@section shadowfs


@node Distributed Filesystems
@chapter Distributed Filesystems

Distributed filesystems are designed to share files between separate
machines via a network connection of some sort.  Their design is
significantly different than stored filesystems (@pxref{Stored
Filesystems}): they need to deal with the problems of network delays and
failures, and may require complex authentication and replication
protocols involving multiple file servers.

@menu
* File Transfer Protocol::      A distributed filesystem based on FTP.
* Network File System::         Sun's NFS: a lousy, but common filesystem.
@end menu


@node File Transfer Protocol
@section File Transfer Protocol
@cindex FTP

FIXME: finish

@menu
* FTP Connection Library::      Managing remote FTP server connections.
@end menu

@subsection ftpcp, ftpdir
@subsection ftpfs

@node FTP Connection Library
@subsection FTP Connection Library
@scindex libftpconn
@scindex ftpconn.h

FIXME: finish


@node Network File System
@section Network File System
@cindex NFS

FIXME: finish

@subsection nfsd
@subsection nfs


@node Networking
@chapter Networking

FIXME: this subsystem is in flux @c Thomas, 26-03-1998

@menu
* Socket Interface::            Network communication I/O protocol.
@end menu


@section pfinet
@section pflocal
@section libpipe

@node Socket Interface
@section Socket Interface
@scindex socket.defs

FIXME: net frobbing stuff may be added to socket.defs
@c Thomas, 26-03-1998


@node Terminal Handling
@chapter Terminal Handling

FIXME: finish

@section term
@section term.defs


@node Running Programs
@chapter Running Programs

FIXME: finish

@section ps, w
@section libps
@section exec
@section proc
@section crash


@node Authentication
@chapter Authentication

FIXME: finish

@menu
* Auth Interface::              Auth ports implement the auth interface.
@end menu

@section addauth, rmauth, setauth
@section su, sush, unsu
@section login, loginpr
@section auth

@node Auth Interface
@section Auth Interface
@scindex auth.defs

FIXME: finish

@menu
* Auth Protocol::               Bidirectional authentication.
@end menu

@node Auth Protocol
@subsection Auth Protocol

FIXME: finish


@node Index
@unnumbered Index

@printindex cp

@summarycontents
@contents
@bye