1 module deimos.rados;
2 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
3 // vim: ts=8 sw=2 smarttab
4 /*
5  * Ceph - scalable distributed file system
6  *
7  * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net>
8  *
9  * This is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License version 2.1, as published by the Free Software
12  * Foundation.  See file COPYING.
13  *
14  */
15 
16 import core.stdc.config;
17 import core.stdc.time;
18 import core.sys.posix.sys.time;
19 import core.sys.posix.sys.types;
20 
21 extern (C):
22 
23 /* These are also defined in rados.h and objclass.h. Keep them in sync! */
24 enum CEPH_OSD_TMAP_HDR = 'h';
25 enum CEPH_OSD_TMAP_SET = 's';
26 enum CEPH_OSD_TMAP_CREATE = 'c';
27 enum CEPH_OSD_TMAP_RM = 'r';
28 
29 enum LIBRADOS_VER_MAJOR = 0;
30 enum LIBRADOS_VER_MINOR = 69;
31 enum LIBRADOS_VER_EXTRA = 1;
32 
33 extern (D) auto LIBRADOS_VERSION(T0, T1, T2)(auto ref T0 maj, auto ref T1 min, auto ref T2 extra)
34 {
35     return (maj << 16) + (min << 8) + extra;
36 }
37 
38 enum LIBRADOS_VERSION_CODE = LIBRADOS_VERSION(LIBRADOS_VER_MAJOR, LIBRADOS_VER_MINOR, LIBRADOS_VER_EXTRA);
39 
40 enum LIBRADOS_SUPPORTS_WATCH = 1;
41 
42 /* RADOS lock flags
43  * They are also defined in cls_lock_types.h. Keep them in sync!
44  */
45 enum LIBRADOS_LOCK_FLAG_RENEW = 0x1;
46 
47 /*
48  * Constants for rados_write_op_create().
49  */
50 enum LIBRADOS_CREATE_EXCLUSIVE = 1;
51 enum LIBRADOS_CREATE_IDEMPOTENT = 0;
52 
53 /*
54  * Flags that can be set on a per-op basis via
55  * rados_read_op_set_flags() and rados_write_op_set_flags().
56  */
57 enum
58 {
59     // fail a create operation if the object already exists
60     LIBRADOS_OP_FLAG_EXCL = 1,
61     // allow the transaction to succeed even if the flagged op fails
62     LIBRADOS_OP_FLAG_FAILOK = 2,
63     // indicate read/write op random
64     LIBRADOS_OP_FLAG_FADVISE_RANDOM = 4,
65     // indicate read/write op sequential
66     LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL = 8,
67     // indicate read/write data will be accessed in the near future (by someone)
68     LIBRADOS_OP_FLAG_FADVISE_WILLNEED = 16,
69     // indicate read/write data will not accessed in the near future (by anyone)
70     LIBRADOS_OP_FLAG_FADVISE_DONTNEED = 32,
71     // indicate read/write data will not accessed again (by *this* client)
72     LIBRADOS_OP_FLAG_FADVISE_NOCACHE = 64
73 }
74 
75 /**
76  * @name xattr comparison operations
77  * Operators for comparing xattrs on objects, and aborting the
78  * rados_read_op or rados_write_op transaction if the comparison
79  * fails.
80  *
81  * @{
82  */
83 enum
84 {
85     LIBRADOS_CMPXATTR_OP_EQ = 1,
86     LIBRADOS_CMPXATTR_OP_NE = 2,
87     LIBRADOS_CMPXATTR_OP_GT = 3,
88     LIBRADOS_CMPXATTR_OP_GTE = 4,
89     LIBRADOS_CMPXATTR_OP_LT = 5,
90     LIBRADOS_CMPXATTR_OP_LTE = 6
91 }
92 
93 /** @} */
94 
95 /**
96  * @name Operation Flags
97  * Flags for rados_read_op_opeprate(), rados_write_op_operate(),
98  * rados_aio_read_op_operate(), and rados_aio_write_op_operate().
99  * See librados.hpp for details.
100  * @{
101  */
102 enum
103 {
104     LIBRADOS_OPERATION_NOFLAG = 0,
105     LIBRADOS_OPERATION_BALANCE_READS = 1,
106     LIBRADOS_OPERATION_LOCALIZE_READS = 2,
107     LIBRADOS_OPERATION_ORDER_READS_WRITES = 4,
108     LIBRADOS_OPERATION_IGNORE_CACHE = 8,
109     LIBRADOS_OPERATION_SKIPRWLOCKS = 16,
110     LIBRADOS_OPERATION_IGNORE_OVERLAY = 32,
111     /* send requests to cluster despite the cluster or pool being marked
112        full; ops will either succeed (e.g., delete) or return EDQUOT or
113        ENOSPC. */
114     LIBRADOS_OPERATION_FULL_TRY = 64
115 }
116 
117 /** @} */
118 
119 /**
120  * @name Alloc hint flags
121  * Flags for rados_write_op_alloc_hint2() and rados_set_alloc_hint2()
122  * indicating future IO patterns.
123  * @{
124  */
125 enum
126 {
127     LIBRADOS_ALLOC_HINT_SEQUENTIAL_WRITE = 1,
128     LIBRADOS_ALLOC_HINT_RANDOM_WRITE = 2,
129     LIBRADOS_ALLOC_HINT_FLAG_SEQUENTIAL_READ = 4,
130     LIBRADOS_ALLOC_HINT_FLAG_RANDOM_READ = 8,
131     LIBRADOS_ALLOC_HINT_FLAG_APPEND_ONLY = 16,
132     LIBRADOS_ALLOC_HINT_FLAG_IMMUTABLE = 32,
133     LIBRADOS_ALLOC_HINT_FLAG_SHORTLIVED = 64,
134     LIBRADOS_ALLOC_HINT_FLAG_LONGLIVED = 128,
135     LIBRADOS_ALLOC_HINT_FLAG_COMPRESSIBLE = 256,
136     LIBRADOS_ALLOC_HINT_FLAG_INCOMPRESSIBLE = 512
137 }
138 
139 /** @} */
140 
141 /*
142  * snap id contants
143  */
144 enum LIBRADOS_SNAP_HEAD = cast(ulong) -2;
145 enum LIBRADOS_SNAP_DIR = cast(ulong) -1;
146 
147 /**
148  * @typedef rados_t
149  *
150  * A handle for interacting with a RADOS cluster. It encapsulates all
151  * RADOS client configuration, including username, key for
152  * authentication, logging, and debugging. Talking different clusters
153  * -- or to the same cluster with different users -- requires
154  * different cluster handles.
155  */
156 alias void* rados_t;
157 
158 /**
159  * @typedef rados_config_t
160  *
161  * A handle for the ceph configuration context for the rados_t cluster
162  * instance.  This can be used to share configuration context/state
163  * (e.g., logging configuration) between librados instance.
164  *
165  * @warning The config context does not have independent reference
166  * counting.  As such, a rados_config_t handle retrieved from a given
167  * rados_t is only valid as long as that rados_t.
168  */
169 alias void* rados_config_t;
170 
171 /**
172  * @typedef rados_ioctx_t
173  *
174  * An io context encapsulates a few settings for all I/O operations
175  * done on it:
176  * - pool - set when the io context is created (see rados_ioctx_create())
177  * - snapshot context for writes (see
178  *   rados_ioctx_selfmanaged_snap_set_write_ctx())
179  * - snapshot id to read from (see rados_ioctx_snap_set_read())
180  * - object locator for all single-object operations (see
181  *   rados_ioctx_locator_set_key())
182  * - namespace for all single-object operations (see
183  *   rados_ioctx_set_namespace()).  Set to LIBRADOS_ALL_NSPACES
184  *   before rados_nobjects_list_open() will list all objects in all
185  *   namespaces.
186  *
187  * @warning Changing any of these settings is not thread-safe -
188  * librados users must synchronize any of these changes on their own,
189  * or use separate io contexts for each thread
190  */
191 alias void* rados_ioctx_t;
192 
193 /**
194  * @typedef rados_list_ctx_t
195  *
196  * An iterator for listing the objects in a pool.
197  * Used with rados_nobjects_list_open(),
198  * rados_nobjects_list_next(), and
199  * rados_nobjects_list_close().
200  */
201 alias void* rados_list_ctx_t;
202 
203 /**
204  * @typedef rados_object_list_cursor
205  *
206  * The cursor used with rados_enumerate_objects
207  * and accompanying methods.
208  */
209 alias void* rados_object_list_cursor;
210 
211 struct rados_object_list_item
212 {
213     size_t oid_length;
214     char* oid;
215 
216     size_t nspace_length;
217     char* nspace;
218 
219     size_t locator_length;
220     char* locator;
221 }
222 
223 /**
224  * @typedef rados_snap_t
225  * The id of a snapshot.
226  */
227 alias c_ulong rados_snap_t;
228 
229 /**
230  * @typedef rados_xattrs_iter_t
231  * An iterator for listing extended attrbutes on an object.
232  * Used with rados_getxattrs(), rados_getxattrs_next(), and
233  * rados_getxattrs_end().
234  */
235 alias void* rados_xattrs_iter_t;
236 
237 /**
238  * @typedef rados_omap_iter_t
239  * An iterator for listing omap key/value pairs on an object.
240  * Used with rados_read_op_omap_get_keys(), rados_read_op_omap_get_vals(),
241  * rados_read_op_omap_get_vals_by_keys(), rados_omap_get_next(), and
242  * rados_omap_get_end().
243  */
244 alias void* rados_omap_iter_t;
245 
246 /**
247  * @struct rados_pool_stat_t
248  * Usage information for a pool.
249  */
250 struct rados_pool_stat_t
251 {
252     /// space used in bytes
253     ulong num_bytes;
254     /// space used in KB
255     ulong num_kb;
256     /// number of objects in the pool
257     ulong num_objects;
258     /// number of clones of objects
259     ulong num_object_clones;
260     /// num_objects * num_replicas
261     ulong num_object_copies;
262     ulong num_objects_missing_on_primary;
263     /// number of objects found on no OSDs
264     ulong num_objects_unfound;
265     /// number of objects replicated fewer times than they should be
266     /// (but found on at least one OSD)
267     ulong num_objects_degraded;
268     ulong num_rd;
269     ulong num_rd_kb;
270     ulong num_wr;
271     ulong num_wr_kb;
272 }
273 
274 /**
275  * @struct rados_cluster_stat_t
276  * Cluster-wide usage information
277  */
278 struct rados_cluster_stat_t
279 {
280     ulong kb;
281     ulong kb_used;
282     ulong kb_avail;
283     ulong num_objects;
284 }
285 
286 /**
287  * @typedef rados_write_op_t
288  *
289  * An object write operation stores a number of operations which can be
290  * executed atomically. For usage, see:
291  * - Creation and deletion: rados_create_write_op() rados_release_write_op()
292  * - Extended attribute manipulation: rados_write_op_cmpxattr()
293  *   rados_write_op_cmpxattr(), rados_write_op_setxattr(),
294  *   rados_write_op_rmxattr()
295  * - Object map key/value pairs: rados_write_op_omap_set(),
296  *   rados_write_op_omap_rm_keys(), rados_write_op_omap_clear(),
297  *   rados_write_op_omap_cmp()
298  * - Object properties: rados_write_op_assert_exists(),
299  *   rados_write_op_assert_version()
300  * - Creating objects: rados_write_op_create()
301  * - IO on objects: rados_write_op_append(), rados_write_op_write(), rados_write_op_zero
302  *   rados_write_op_write_full(), rados_write_op_writesame(), rados_write_op_remove,
303  *   rados_write_op_truncate(), rados_write_op_zero()
304  * - Hints: rados_write_op_set_alloc_hint()
305  * - Performing the operation: rados_write_op_operate(), rados_aio_write_op_operate()
306  */
307 alias void* rados_write_op_t;
308 
309 /**
310  * @typedef rados_read_op_t
311  *
312  * An object read operation stores a number of operations which can be
313  * executed atomically. For usage, see:
314  * - Creation and deletion: rados_create_read_op() rados_release_read_op()
315  * - Extended attribute manipulation: rados_read_op_cmpxattr(),
316  *   rados_read_op_getxattr(), rados_read_op_getxattrs()
317  * - Object map key/value pairs: rados_read_op_omap_get_vals(),
318  *   rados_read_op_omap_get_keys(), rados_read_op_omap_get_vals_by_keys(),
319  *   rados_read_op_omap_cmp()
320  * - Object properties: rados_read_op_stat(), rados_read_op_assert_exists(),
321  *   rados_read_op_assert_version()
322  * - IO on objects: rados_read_op_read()
323  * - Custom operations: rados_read_op_exec(), rados_read_op_exec_user_buf()
324  * - Request properties: rados_read_op_set_flags()
325  * - Performing the operation: rados_read_op_operate(),
326  *   rados_aio_read_op_operate()
327  */
328 alias void* rados_read_op_t;
329 
330 /**
331  * Get the version of librados.
332  *
333  * The version number is major.minor.extra. Note that this is
334  * unrelated to the Ceph version number.
335  *
336  * TODO: define version semantics, i.e.:
337  * - incrementing major is for backwards-incompatible changes
338  * - incrementing minor is for backwards-compatible changes
339  * - incrementing extra is for bug fixes
340  *
341  * @param major where to store the major version number
342  * @param minor where to store the minor version number
343  * @param extra where to store the extra version number
344  */
345 void rados_version (int* major, int* minor, int* extra);
346 
347 /**
348  * @name Setup and Teardown
349  * These are the first and last functions to that should be called
350  * when using librados.
351  *
352  * @{
353  */
354 
355 /**
356  * Create a handle for communicating with a RADOS cluster.
357  *
358  * Ceph environment variables are read when this is called, so if
359  * $CEPH_ARGS specifies everything you need to connect, no further
360  * configuration is necessary.
361  *
362  * @param cluster where to store the handle
363  * @param id the user to connect as (i.e. admin, not client.admin)
364  * @returns 0 on success, negative error code on failure
365  */
366 int rados_create (rados_t* cluster, const char* id);
367 
368 /**
369  * Extended version of rados_create.
370  *
371  * Like rados_create, but
372  * 1) don't assume 'client\.'+id; allow full specification of name
373  * 2) allow specification of cluster name
374  * 3) flags for future expansion
375  */
376 int rados_create2 (
377     rados_t* pcluster,
378     const char* clustername,
379     const char* name,
380     ulong flags);
381 
382 /**
383  * Initialize a cluster handle from an existing configuration.
384  *
385  * Share configuration state with another rados_t instance.
386  *
387  * @param cluster where to store the handle
388  * @param cct the existing configuration to use
389  * @returns 0 on success, negative error code on failure
390  */
391 int rados_create_with_context (rados_t* cluster, rados_config_t cct);
392 
393 /**
394  * Ping the monitor with ID mon_id, storing the resulting reply in
395  * buf (if specified) with a maximum size of len.
396  *
397  * The result buffer is allocated on the heap; the caller is
398  * expected to release that memory with rados_buffer_free().  The
399  * buffer and length pointers can be NULL, in which case they are
400  * not filled in.
401  *
402  * @param      cluster    cluster handle
403  * @param[in]  mon_id     ID of the monitor to ping
404  * @param[out] outstr     double pointer with the resulting reply
405  * @param[out] outstrlen  pointer with the size of the reply in outstr
406  */
407 int rados_ping_monitor (
408     rados_t cluster,
409     const(char)* mon_id,
410     char** outstr,
411     size_t* outstrlen);
412 
413 /**
414  * Connect to the cluster.
415  *
416  * @note BUG: Before calling this, calling a function that communicates with the
417  * cluster will crash.
418  *
419  * @pre The cluster handle is configured with at least a monitor
420  * address. If cephx is enabled, a client name and secret must also be
421  * set.
422  *
423  * @post If this succeeds, any function in librados may be used
424  *
425  * @param cluster The cluster to connect to.
426  * @returns 0 on sucess, negative error code on failure
427  */
428 int rados_connect (rados_t cluster);
429 
430 /**
431  * Disconnects from the cluster.
432  *
433  * For clean up, this is only necessary after rados_connect() has
434  * succeeded.
435  *
436  * @warning This does not guarantee any asynchronous writes have
437  * completed. To do that, you must call rados_aio_flush() on all open
438  * io contexts.
439  *
440  * @warning We implicitly call rados_watch_flush() on shutdown.  If
441  * there are watches being used, this should be done explicitly before
442  * destroying the relevant IoCtx.  We do it here as a safety measure.
443  *
444  * @post the cluster handle cannot be used again
445  *
446  * @param cluster the cluster to shutdown
447  */
448 void rados_shutdown (rados_t cluster);
449 
450 /** @} init */
451 
452 /**
453  * @name Configuration
454  * These functions read and update Ceph configuration for a cluster
455  * handle. Any configuration changes must be done before connecting to
456  * the cluster.
457  *
458  * Options that librados users might want to set include:
459  * - mon_host
460  * - auth_supported
461  * - key, keyfile, or keyring when using cephx
462  * - log_file, log_to_stderr, err_to_stderr, and log_to_syslog
463  * - debug_rados, debug_objecter, debug_monc, debug_auth, or debug_ms
464  *
465  * All possible options can be found in src/common/config_opts.h in ceph.git
466  *
467  * @{
468  */
469 
470 /**
471  * Configure the cluster handle using a Ceph config file
472  *
473  * If path is NULL, the default locations are searched, and the first
474  * found is used. The locations are:
475  * - $CEPH_CONF (environment variable)
476  * - /etc/ceph/ceph.conf
477  * - ~/.ceph/config
478  * - ceph.conf (in the current working directory)
479  *
480  * @pre rados_connect() has not been called on the cluster handle
481  *
482  * @param cluster cluster handle to configure
483  * @param path path to a Ceph configuration file
484  * @returns 0 on success, negative error code on failure
485  */
486 int rados_conf_read_file (rados_t cluster, const(char)* path);
487 
488 /**
489  * Configure the cluster handle with command line arguments
490  *
491  * argv can contain any common Ceph command line option, including any
492  * configuration parameter prefixed by '--' and replacing spaces with
493  * dashes or underscores. For example, the following options are equivalent:
494  * - --mon-host 10.0.0.1:6789
495  * - --mon_host 10.0.0.1:6789
496  * - -m 10.0.0.1:6789
497  *
498  * @pre rados_connect() has not been called on the cluster handle
499  *
500  * @param cluster cluster handle to configure
501  * @param argc number of arguments in argv
502  * @param argv arguments to parse
503  * @returns 0 on success, negative error code on failure
504  */
505 int rados_conf_parse_argv (rados_t cluster, int argc, const(char*)* argv);
506 
507 /**
508  * Configure the cluster handle with command line arguments, returning
509  * any remainders.  Same rados_conf_parse_argv, except for extra
510  * remargv argument to hold returns unrecognized arguments.
511  *
512  * @pre rados_connect() has not been called on the cluster handle
513  *
514  * @param cluster cluster handle to configure
515  * @param argc number of arguments in argv
516  * @param argv arguments to parse
517  * @param remargv char* array for returned unrecognized arguments
518  * @returns 0 on success, negative error code on failure
519  */
520 int rados_conf_parse_argv_remainder (
521     rados_t cluster,
522     int argc,
523     const(char*)* argv,
524     const(char*)* remargv);
525 
526 /**
527  * Configure the cluster handle based on an environment variable
528  *
529  * The contents of the environment variable are parsed as if they were
530  * Ceph command line options. If var is NULL, the CEPH_ARGS
531  * environment variable is used.
532  *
533  * @pre rados_connect() has not been called on the cluster handle
534  *
535  * @note BUG: this is not threadsafe - it uses a static buffer
536  *
537  * @param cluster cluster handle to configure
538  * @param var name of the environment variable to read
539  * @returns 0 on success, negative error code on failure
540  */
541 int rados_conf_parse_env (rados_t cluster, const(char)* var);
542 
543 /**
544  * Set a configuration option
545  *
546  * @pre rados_connect() has not been called on the cluster handle
547  *
548  * @param cluster cluster handle to configure
549  * @param option option to set
550  * @param value value of the option
551  * @returns 0 on success, negative error code on failure
552  * @returns -ENOENT when the option is not a Ceph configuration option
553  */
554 int rados_conf_set (rados_t cluster, const(char)* option, const(char)* value);
555 
556 /**
557  * Get the value of a configuration option
558  *
559  * @param cluster configuration to read
560  * @param option which option to read
561  * @param buf where to write the configuration value
562  * @param len the size of buf in bytes
563  * @returns 0 on success, negative error code on failure
564  * @returns -ENAMETOOLONG if the buffer is too short to contain the
565  * requested value
566  */
567 int rados_conf_get (
568     rados_t cluster,
569     const(char)* option,
570     char* buf,
571     size_t len);
572 
573 /** @} config */
574 
575 /**
576  * Read usage info about the cluster
577  *
578  * This tells you total space, space used, space available, and number
579  * of objects. These are not updated immediately when data is written,
580  * they are eventually consistent.
581  *
582  * @param cluster cluster to query
583  * @param result where to store the results
584  * @returns 0 on success, negative error code on failure
585  */
586 int rados_cluster_stat (rados_t cluster, rados_cluster_stat_t* result);
587 
588 /**
589  * Get the fsid of the cluster as a hexadecimal string.
590  *
591  * The fsid is a unique id of an entire Ceph cluster.
592  *
593  * @param cluster where to get the fsid
594  * @param buf where to write the fsid
595  * @param len the size of buf in bytes (should be 37)
596  * @returns 0 on success, negative error code on failure
597  * @returns -ERANGE if the buffer is too short to contain the
598  * fsid
599  */
600 int rados_cluster_fsid (rados_t cluster, char* buf, size_t len);
601 
602 /**
603  * Get/wait for the most recent osdmap
604  *
605  * @param cluster the cluster to shutdown
606  * @returns 0 on sucess, negative error code on failure
607  */
608 int rados_wait_for_latest_osdmap (rados_t cluster);
609 
610 /**
611  * @name Pools
612  *
613  * RADOS pools are separate namespaces for objects. Pools may have
614  * different crush rules associated with them, so they could have
615  * differing replication levels or placement strategies. RADOS
616  * permissions are also tied to pools - users can have different read,
617  * write, and execute permissions on a per-pool basis.
618  *
619  * @{
620  */
621 
622 /**
623  * List pools
624  *
625  * Gets a list of pool names as NULL-terminated strings.  The pool
626  * names will be placed in the supplied buffer one after another.
627  * After the last pool name, there will be two 0 bytes in a row.
628  *
629  * If len is too short to fit all the pool name entries we need, we will fill
630  * as much as we can.
631  *
632  * @param cluster cluster handle
633  * @param buf output buffer
634  * @param len output buffer length
635  * @returns length of the buffer we would need to list all pools
636  */
637 int rados_pool_list (rados_t cluster, char* buf, size_t len);
638 
639 /**
640  * List inconsistent placement groups of the given pool
641  *
642  * Gets a list of inconsistent placement groups as NULL-terminated strings.
643  * The placement group names will be placed in the supplied buffer one after
644  * another. After the last name, there will be two 0 types in a row.
645  *
646  * If len is too short to fit all the placement group entries we need, we  will
647  * fill as much as we can.
648  *
649  * @param cluster cluster handle
650  * @param pool pool ID
651  * @param buf output buffer
652  * @param len output buffer length
653  * @returns length of the buffer we would need to list all pools
654  */
655 int rados_inconsistent_pg_list (
656     rados_t cluster,
657     long pool,
658     char* buf,
659     size_t len);
660 
661 /**
662  * Get a configuration handle for a rados cluster handle
663  *
664  * This handle is valid only as long as the cluster handle is valid.
665  *
666  * @param cluster cluster handle
667  * @returns config handle for this cluster
668  */
669 rados_config_t rados_cct (rados_t cluster);
670 
671 /**
672  * Get a global id for current instance
673  *
674  * This id is a unique representation of current connection to the cluster
675  *
676  * @param cluster cluster handle
677  * @returns instance global id
678  */
679 ulong rados_get_instance_id (rados_t cluster);
680 
681 /**
682  * Create an io context
683  *
684  * The io context allows you to perform operations within a particular
685  * pool. For more details see rados_ioctx_t.
686  *
687  * @param cluster which cluster the pool is in
688  * @param pool_name name of the pool
689  * @param ioctx where to store the io context
690  * @returns 0 on success, negative error code on failure
691  */
692 int rados_ioctx_create (
693     rados_t cluster,
694     const(char)* pool_name,
695     rados_ioctx_t* ioctx);
696 int rados_ioctx_create2 (rados_t cluster, long pool_id, rados_ioctx_t* ioctx);
697 
698 /**
699  * The opposite of rados_ioctx_create
700  *
701  * This just tells librados that you no longer need to use the io context.
702  * It may not be freed immediately if there are pending asynchronous
703  * requests on it, but you should not use an io context again after
704  * calling this function on it.
705  *
706  * @warning This does not guarantee any asynchronous
707  * writes have completed. You must call rados_aio_flush()
708  * on the io context before destroying it to do that.
709  *
710  * @warning If this ioctx is used by rados_watch, the caller needs to
711  * be sure that all registered watches are disconnected via
712  * rados_unwatch() and that rados_watch_flush() is called.  This
713  * ensures that a racing watch callback does not make use of a
714  * destroyed ioctx.
715  *
716  * @param io the io context to dispose of
717  */
718 void rados_ioctx_destroy (rados_ioctx_t io);
719 
720 /**
721  * Get configuration hadnle for a pool handle
722  *
723  * @param io pool handle
724  * @returns rados_config_t for this cluster
725  */
726 rados_config_t rados_ioctx_cct (rados_ioctx_t io);
727 
728 /**
729  * Get the cluster handle used by this rados_ioctx_t
730  * Note that this is a weak reference, and should not
731  * be destroyed via rados_shutdown().
732  *
733  * @param io the io context
734  * @returns the cluster handle for this io context
735  */
736 rados_t rados_ioctx_get_cluster (rados_ioctx_t io);
737 
738 /**
739  * Get pool usage statistics
740  *
741  * Fills in a rados_pool_stat_t after querying the cluster.
742  *
743  * @param io determines which pool to query
744  * @param stats where to store the results
745  * @returns 0 on success, negative error code on failure
746  */
747 int rados_ioctx_pool_stat (rados_ioctx_t io, rados_pool_stat_t* stats);
748 
749 /**
750  * Get the id of a pool
751  *
752  * @param cluster which cluster the pool is in
753  * @param pool_name which pool to look up
754  * @returns id of the pool
755  * @returns -ENOENT if the pool is not found
756  */
757 long rados_pool_lookup (rados_t cluster, const(char)* pool_name);
758 
759 /**
760  * Get the name of a pool
761  *
762  * @param cluster which cluster the pool is in
763  * @param id the id of the pool
764  * @param buf where to store the pool name
765  * @param maxlen size of buffer where name will be stored
766  * @returns length of string stored, or -ERANGE if buffer too small
767  */
768 int rados_pool_reverse_lookup (
769     rados_t cluster,
770     long id,
771     char* buf,
772     size_t maxlen);
773 
774 /**
775  * Create a pool with default settings
776  *
777  * The default owner is the admin user (auid 0).
778  * The default crush rule is rule 0.
779  *
780  * @param cluster the cluster in which the pool will be created
781  * @param pool_name the name of the new pool
782  * @returns 0 on success, negative error code on failure
783  */
784 int rados_pool_create (rados_t cluster, const(char)* pool_name);
785 
786 /**
787  * Create a pool owned by a specific auid
788  *
789  * The auid is the authenticated user id to give ownership of the pool.
790  * TODO: document auid and the rest of the auth system
791  *
792  * @param cluster the cluster in which the pool will be created
793  * @param pool_name the name of the new pool
794  * @param auid the id of the owner of the new pool
795  * @returns 0 on success, negative error code on failure
796  */
797 int rados_pool_create_with_auid (
798     rados_t cluster,
799     const(char)* pool_name,
800     ulong auid);
801 
802 /**
803  * Create a pool with a specific CRUSH rule
804  *
805  * @param cluster the cluster in which the pool will be created
806  * @param pool_name the name of the new pool
807  * @param crush_rule_num which rule to use for placement in the new pool1
808  * @returns 0 on success, negative error code on failure
809  */
810 int rados_pool_create_with_crush_rule (
811     rados_t cluster,
812     const(char)* pool_name,
813     ubyte crush_rule_num);
814 
815 /**
816  * Create a pool with a specific CRUSH rule and auid
817  *
818  * This is a combination of rados_pool_create_with_crush_rule() and
819  * rados_pool_create_with_auid().
820  *
821  * @param cluster the cluster in which the pool will be created
822  * @param pool_name the name of the new pool
823  * @param crush_rule_num which rule to use for placement in the new pool2
824  * @param auid the id of the owner of the new pool
825  * @returns 0 on success, negative error code on failure
826  */
827 int rados_pool_create_with_all (
828     rados_t cluster,
829     const(char)* pool_name,
830     ulong auid,
831     ubyte crush_rule_num);
832 
833 /**
834  * Returns the pool that is the base tier for this pool.
835  *
836  * The return value is the ID of the pool that should be used to read from/write to.
837  * If tiering is not set up for the pool, returns \c pool.
838  *
839  * @param cluster the cluster the pool is in
840  * @param pool ID of the pool to query
841  * @param[out] base_tier base tier, or \c pool if tiering is not configured
842  * @returns 0 on success, negative error code on failure
843  */
844 int rados_pool_get_base_tier (rados_t cluster, long pool, long* base_tier);
845 
846 /**
847  * Delete a pool and all data inside it
848  *
849  * The pool is removed from the cluster immediately,
850  * but the actual data is deleted in the background.
851  *
852  * @param cluster the cluster the pool is in
853  * @param pool_name which pool to delete
854  * @returns 0 on success, negative error code on failure
855  */
856 int rados_pool_delete (rados_t cluster, const(char)* pool_name);
857 
858 /**
859  * Attempt to change an io context's associated auid "owner"
860  *
861  * Requires that you have write permission on both the current and new
862  * auid.
863  *
864  * @param io reference to the pool to change.
865  * @param auid the auid you wish the io to have.
866  * @returns 0 on success, negative error code on failure
867  */
868 int rados_ioctx_pool_set_auid (rados_ioctx_t io, ulong auid);
869 
870 /**
871  * Get the auid of a pool
872  *
873  * @param io pool to query
874  * @param auid where to store the auid
875  * @returns 0 on success, negative error code on failure
876  */
877 int rados_ioctx_pool_get_auid (rados_ioctx_t io, ulong* auid);
878 
879 /* deprecated, use rados_ioctx_pool_requires_alignment2 instead */
880 int rados_ioctx_pool_requires_alignment (rados_ioctx_t io);
881 
882 /**
883  * Test whether the specified pool requires alignment or not.
884  *
885  * @param io pool to query
886  * @param requires 1 if alignment is supported, 0 if not.
887  * @returns 0 on success, negative error code on failure
888  */
889 int rados_ioctx_pool_requires_alignment2 (rados_ioctx_t io, int* requires);
890 
891 /* deprecated, use rados_ioctx_pool_required_alignment2 instead */
892 ulong rados_ioctx_pool_required_alignment (rados_ioctx_t io);
893 
894 /**
895  * Get the alignment flavor of a pool
896  *
897  * @param io pool to query
898  * @param alignment where to store the alignment flavor
899  * @returns 0 on success, negative error code on failure
900  */
901 int rados_ioctx_pool_required_alignment2 (rados_ioctx_t io, ulong* alignment);
902 
903 /**
904  * Get the pool id of the io context
905  *
906  * @param io the io context to query
907  * @returns the id of the pool the io context uses
908  */
909 long rados_ioctx_get_id (rados_ioctx_t io);
910 
911 /**
912  * Get the pool name of the io context
913  *
914  * @param io the io context to query
915  * @param buf pointer to buffer where name will be stored
916  * @param maxlen size of buffer where name will be stored
917  * @returns length of string stored, or -ERANGE if buffer too small
918  */
919 int rados_ioctx_get_pool_name (rados_ioctx_t io, char* buf, uint maxlen);
920 
921 /** @} pools */
922 
923 /**
924  * @name Object Locators
925  *
926  * @{
927  */
928 
929 /**
930  * Set the key for mapping objects to pgs within an io context.
931  *
932  * The key is used instead of the object name to determine which
933  * placement groups an object is put in. This affects all subsequent
934  * operations of the io context - until a different locator key is
935  * set, all objects in this io context will be placed in the same pg.
936  *
937  * This is useful if you need to do clone_range operations, which must
938  * be done with the source and destination objects in the same pg.
939  *
940  * @param io the io context to change
941  * @param key the key to use as the object locator, or NULL to discard
942  * any previously set key
943  */
944 void rados_ioctx_locator_set_key (rados_ioctx_t io, const(char)* key);
945 
946 /**
947  * Set the namespace for objects within an io context
948  *
949  * The namespace specification further refines a pool into different
950  * domains.  The mapping of objects to pgs is also based on this
951  * value.
952  *
953  * @param io the io context to change
954  * @param nspace the name to use as the namespace, or NULL use the
955  * default namespace
956  */
957 void rados_ioctx_set_namespace (rados_ioctx_t io, const(char)* nspace);
958 
959 /** @} obj_loc */
960 
961 /**
962  * @name New Listing Objects
963  * @{
964  */
965 /**
966  * Start listing objects in a pool
967  *
968  * @param io the pool to list from
969  * @param ctx the handle to store list context in
970  * @returns 0 on success, negative error code on failure
971  */
972 int rados_nobjects_list_open (rados_ioctx_t io, rados_list_ctx_t* ctx);
973 
974 /**
975  * Return hash position of iterator, rounded to the current PG
976  *
977  * @param ctx iterator marking where you are in the listing
978  * @returns current hash position, rounded to the current pg
979  */
980 uint rados_nobjects_list_get_pg_hash_position (rados_list_ctx_t ctx);
981 
982 /**
983  * Reposition object iterator to a different hash position
984  *
985  * @param ctx iterator marking where you are in the listing
986  * @param pos hash position to move to
987  * @returns actual (rounded) position we moved to
988  */
989 uint rados_nobjects_list_seek (rados_list_ctx_t ctx, uint pos);
990 
991 /**
992  * Get the next object name and locator in the pool
993  *
994  * *entry and *key are valid until next call to rados_objects_list_*
995  *
996  * @param ctx iterator marking where you are in the listing
997  * @param entry where to store the name of the entry
998  * @param key where to store the object locator (set to NULL to ignore)
999  * @param nspace where to store the object namespace (set to NULL to ignore)
1000  * @returns 0 on success, negative error code on failure
1001  * @returns -ENOENT when there are no more objects to list
1002  */
1003 int rados_nobjects_list_next (
1004     rados_list_ctx_t ctx,
1005     const(char*)* entry,
1006     const(char*)* key,
1007     const(char*)* nspace);
1008 
1009 /**
1010  * Close the object listing handle.
1011  *
1012  * This should be called when the handle is no longer needed.
1013  * The handle should not be used after it has been closed.
1014  *
1015  * @param ctx the handle to close
1016  */
1017 void rados_nobjects_list_close (rados_list_ctx_t ctx);
1018 
1019 rados_object_list_cursor rados_object_list_begin (rados_ioctx_t io);
1020 rados_object_list_cursor rados_object_list_end (rados_ioctx_t io);
1021 
1022 int rados_object_list_is_end (rados_ioctx_t io, rados_object_list_cursor cur);
1023 
1024 void rados_object_list_cursor_free (
1025     rados_ioctx_t io,
1026     rados_object_list_cursor cur);
1027 
1028 int rados_object_list_cursor_cmp (
1029     rados_ioctx_t io,
1030     rados_object_list_cursor lhs,
1031     rados_object_list_cursor rhs);
1032 
1033 /**
1034  * @return the number of items set in the result array
1035  */
1036 int rados_object_list (
1037     rados_ioctx_t io,
1038     const rados_object_list_cursor start,
1039     const rados_object_list_cursor finish,
1040     const size_t result_size,
1041     const(char)* filter_buf,
1042     const size_t filter_buf_len,
1043     rados_object_list_item* results,
1044     rados_object_list_cursor* next);
1045 
1046 void rados_object_list_free (
1047     const size_t result_size,
1048     rados_object_list_item* results);
1049 
1050 /**
1051  * Obtain cursors delineating a subset of a range.  Use this
1052  * when you want to split up the work of iterating over the
1053  * global namespace.  Expected use case is when you are iterating
1054  * in parallel, with `m` workers, and each worker taking an id `n`.
1055  *
1056  * @param start start of the range to be sliced up (inclusive)
1057  * @param finish end of the range to be sliced up (exclusive)
1058  * @param m how many chunks to divide start-finish into
1059  * @param n which of the m chunks you would like to get cursors for
1060  * @param split_start cursor populated with start of the subrange (inclusive)
1061  * @param split_finish cursor populated with end of the subrange (exclusive)
1062  */
1063 void rados_object_list_slice (
1064     rados_ioctx_t io,
1065     const rados_object_list_cursor start,
1066     const rados_object_list_cursor finish,
1067     const size_t n,
1068     const size_t m,
1069     rados_object_list_cursor* split_start,
1070     rados_object_list_cursor* split_finish);
1071 
1072 /** @} New Listing Objects */
1073 
1074 /**
1075  * @name Deprecated Listing Objects
1076  *
1077  * Older listing objects interface.  Please use the new interface.
1078  * @{
1079  */
1080 /**
1081  * @warning Deprecated: Use rados_nobjects_list_open() instead
1082  */
1083 int rados_objects_list_open (rados_ioctx_t io, rados_list_ctx_t* ctx);
1084 
1085 /**
1086  * @warning Deprecated: Use rados_nobjects_list_get_pg_hash_position() instead
1087  */
1088 uint rados_objects_list_get_pg_hash_position (rados_list_ctx_t ctx);
1089 
1090 /**
1091  * @warning Deprecated: Use rados_nobjects_list_seek() instead
1092  */
1093 uint rados_objects_list_seek (rados_list_ctx_t ctx, uint pos);
1094 
1095 /**
1096  * @warning Deprecated: Use rados_nobjects_list_next() instead
1097  */
1098 int rados_objects_list_next (
1099     rados_list_ctx_t ctx,
1100     const(char*)* entry,
1101     const(char*)* key);
1102 
1103 /**
1104  * @warning Deprecated: Use rados_nobjects_list_close() instead
1105  */
1106 void rados_objects_list_close (rados_list_ctx_t ctx);
1107 
1108 /** @} Listing Objects */
1109 
1110 /**
1111  * @name Snapshots
1112  *
1113  * RADOS snapshots are based upon sequence numbers that form a
1114  * snapshot context. They are pool-specific. The snapshot context
1115  * consists of the current snapshot sequence number for a pool, and an
1116  * array of sequence numbers at which snapshots were taken, in
1117  * descending order. Whenever a snapshot is created or deleted, the
1118  * snapshot sequence number for the pool is increased. To add a new
1119  * snapshot, the new snapshot sequence number must be increased and
1120  * added to the snapshot context.
1121  *
1122  * There are two ways to manage these snapshot contexts:
1123  * -# within the RADOS cluster
1124  *    These are called pool snapshots, and store the snapshot context
1125  *    in the OSDMap. These represent a snapshot of all the objects in
1126  *    a pool.
1127  * -# within the RADOS clients
1128  *    These are called self-managed snapshots, and push the
1129  *    responsibility for keeping track of the snapshot context to the
1130  *    clients. For every write, the client must send the snapshot
1131  *    context. In librados, this is accomplished with
1132  *    rados_selfmanaged_snap_set_write_ctx(). These are more
1133  *    difficult to manage, but are restricted to specific objects
1134  *    instead of applying to an entire pool.
1135  *
1136  * @{
1137  */
1138 
1139 /**
1140  * Create a pool-wide snapshot
1141  *
1142  * @param io the pool to snapshot
1143  * @param snapname the name of the snapshot
1144  * @returns 0 on success, negative error code on failure
1145  */
1146 int rados_ioctx_snap_create (rados_ioctx_t io, const(char)* snapname);
1147 
1148 /**
1149  * Delete a pool snapshot
1150  *
1151  * @param io the pool to delete the snapshot from
1152  * @param snapname which snapshot to delete
1153  * @returns 0 on success, negative error code on failure
1154  */
1155 int rados_ioctx_snap_remove (rados_ioctx_t io, const(char)* snapname);
1156 
1157 /**
1158  * Rollback an object to a pool snapshot
1159  *
1160  * The contents of the object will be the same as
1161  * when the snapshot was taken.
1162  *
1163  * @param io the pool in which the object is stored
1164  * @param oid the name of the object to rollback
1165  * @param snapname which snapshot to rollback to
1166  * @returns 0 on success, negative error code on failure
1167  */
1168 int rados_ioctx_snap_rollback (
1169     rados_ioctx_t io,
1170     const(char)* oid,
1171     const(char)* snapname);
1172 
1173 /**
1174  * @warning Deprecated: Use rados_ioctx_snap_rollback() instead
1175  */
1176 int rados_rollback (rados_ioctx_t io, const(char)* oid, const(char)* snapname);
1177 
1178 /**
1179  * Set the snapshot from which reads are performed.
1180  *
1181  * Subsequent reads will return data as it was at the time of that
1182  * snapshot.
1183  *
1184  * @param io the io context to change
1185  * @param snap the id of the snapshot to set, or LIBRADOS_SNAP_HEAD for no
1186  * snapshot (i.e. normal operation)
1187  */
1188 void rados_ioctx_snap_set_read (rados_ioctx_t io, rados_snap_t snap);
1189 
1190 /**
1191  * Allocate an ID for a self-managed snapshot
1192  *
1193  * Get a unique ID to put in the snaphot context to create a
1194  * snapshot. A clone of an object is not created until a write with
1195  * the new snapshot context is completed.
1196  *
1197  * @param io the pool in which the snapshot will exist
1198  * @param snapid where to store the newly allocated snapshot ID
1199  * @returns 0 on success, negative error code on failure
1200  */
1201 int rados_ioctx_selfmanaged_snap_create (
1202     rados_ioctx_t io,
1203     rados_snap_t* snapid);
1204 
1205 /**
1206  * Remove a self-managed snapshot
1207  *
1208  * This increases the snapshot sequence number, which will cause
1209  * snapshots to be removed lazily.
1210  *
1211  * @param io the pool in which the snapshot will exist
1212  * @param snapid where to store the newly allocated snapshot ID
1213  * @returns 0 on success, negative error code on failure
1214  */
1215 int rados_ioctx_selfmanaged_snap_remove (rados_ioctx_t io, rados_snap_t snapid);
1216 
1217 /**
1218  * Rollback an object to a self-managed snapshot
1219  *
1220  * The contents of the object will be the same as
1221  * when the snapshot was taken.
1222  *
1223  * @param io the pool in which the object is stored
1224  * @param oid the name of the object to rollback
1225  * @param snapid which snapshot to rollback to
1226  * @returns 0 on success, negative error code on failure
1227  */
1228 int rados_ioctx_selfmanaged_snap_rollback (
1229     rados_ioctx_t io,
1230     const(char)* oid,
1231     rados_snap_t snapid);
1232 
1233 /**
1234  * Set the snapshot context for use when writing to objects
1235  *
1236  * This is stored in the io context, and applies to all future writes.
1237  *
1238  * @param io the io context to change
1239  * @param seq the newest snapshot sequence number for the pool
1240  * @param snaps array of snapshots in sorted by descending id
1241  * @param num_snaps how many snaphosts are in the snaps array
1242  * @returns 0 on success, negative error code on failure
1243  * @returns -EINVAL if snaps are not in descending order
1244  */
1245 int rados_ioctx_selfmanaged_snap_set_write_ctx (
1246     rados_ioctx_t io,
1247     rados_snap_t seq,
1248     rados_snap_t* snaps,
1249     int num_snaps);
1250 
1251 /**
1252  * List all the ids of pool snapshots
1253  *
1254  * If the output array does not have enough space to fit all the
1255  * snapshots, -ERANGE is returned and the caller should retry with a
1256  * larger array.
1257  *
1258  * @param io the pool to read from
1259  * @param snaps where to store the results
1260  * @param maxlen the number of rados_snap_t that fit in the snaps array
1261  * @returns number of snapshots on success, negative error code on failure
1262  * @returns -ERANGE is returned if the snaps array is too short
1263  */
1264 int rados_ioctx_snap_list (rados_ioctx_t io, rados_snap_t* snaps, int maxlen);
1265 
1266 /**
1267  * Get the id of a pool snapshot
1268  *
1269  * @param io the pool to read from
1270  * @param name the snapshot to find
1271  * @param id where to store the result
1272  * @returns 0 on success, negative error code on failure
1273  */
1274 int rados_ioctx_snap_lookup (
1275     rados_ioctx_t io,
1276     const(char)* name,
1277     rados_snap_t* id);
1278 
1279 /**
1280  * Get the name of a pool snapshot
1281  *
1282  * @param io the pool to read from
1283  * @param id the snapshot to find
1284  * @param name where to store the result
1285  * @param maxlen the size of the name array
1286  * @returns 0 on success, negative error code on failure
1287  * @returns -ERANGE if the name array is too small
1288  */
1289 int rados_ioctx_snap_get_name (
1290     rados_ioctx_t io,
1291     rados_snap_t id,
1292     char* name,
1293     int maxlen);
1294 
1295 /**
1296  * Find when a pool snapshot occurred
1297  *
1298  * @param io the pool the snapshot was taken in
1299  * @param id the snapshot to lookup
1300  * @param t where to store the result
1301  * @returns 0 on success, negative error code on failure
1302  */
1303 int rados_ioctx_snap_get_stamp (rados_ioctx_t io, rados_snap_t id, time_t* t);
1304 
1305 /** @} Snapshots */
1306 
1307 /**
1308  * @name Synchronous I/O
1309  * Writes are replicated to a number of OSDs based on the
1310  * configuration of the pool they are in. These write functions block
1311  * until data is in memory on all replicas of the object they're
1312  * writing to - they are equivalent to doing the corresponding
1313  * asynchronous write, and the calling
1314  * rados_ioctx_wait_for_complete().  For greater data safety, use the
1315  * asynchronous functions and rados_aio_wait_for_safe().
1316  *
1317  * @{
1318  */
1319 
1320 /**
1321  * Return the version of the last object read or written to.
1322  *
1323  * This exposes the internal version number of the last object read or
1324  * written via this io context
1325  *
1326  * @param io the io context to check
1327  * @returns last read or written object version
1328  */
1329 ulong rados_get_last_version (rados_ioctx_t io);
1330 
1331 /**
1332  * Write *len* bytes from *buf* into the *oid* object, starting at
1333  * offset *off*. The value of *len* must be <= UINT_MAX/2.
1334  *
1335  * @note This will never return a positive value not equal to len.
1336  * @param io the io context in which the write will occur
1337  * @param oid name of the object
1338  * @param buf data to write
1339  * @param len length of the data, in bytes
1340  * @param off byte offset in the object to begin writing at
1341  * @returns 0 on success, negative error code on failure
1342  */
1343 int rados_write (
1344     rados_ioctx_t io,
1345     const(char)* oid,
1346     const(char)* buf,
1347     size_t len,
1348     ulong off);
1349 
1350 /**
1351  * Write *len* bytes from *buf* into the *oid* object. The value of
1352  * *len* must be <= UINT_MAX/2.
1353  *
1354  * The object is filled with the provided data. If the object exists,
1355  * it is atomically truncated and then written.
1356  *
1357  * @param io the io context in which the write will occur
1358  * @param oid name of the object
1359  * @param buf data to write
1360  * @param len length of the data, in bytes
1361  * @returns 0 on success, negative error code on failure
1362  */
1363 int rados_write_full (
1364     rados_ioctx_t io,
1365     const(char)* oid,
1366     const(char)* buf,
1367     size_t len);
1368 
1369 /**
1370  * Write the same *data_len* bytes from *buf* multiple times into the
1371  * *oid* object. *write_len* bytes are written in total, which must be
1372  * a multiple of *data_len*. The value of *write_len* and *data_len*
1373  * must be <= UINT_MAX/2.
1374  *
1375  * @param io the io context in which the write will occur
1376  * @param oid name of the object
1377  * @param buf data to write
1378  * @param data_len length of the data, in bytes
1379  * @param write_len the total number of bytes to write
1380  * @param off byte offset in the object to begin writing at
1381  * @returns 0 on success, negative error code on failure
1382  */
1383 int rados_writesame (
1384     rados_ioctx_t io,
1385     const(char)* oid,
1386     const(char)* buf,
1387     size_t data_len,
1388     size_t write_len,
1389     ulong off);
1390 
1391 /**
1392  * Efficiently copy a portion of one object to another
1393  *
1394  * If the underlying filesystem on the OSD supports it, this will be a
1395  * copy-on-write clone.
1396  *
1397  * The src and dest objects must be in the same pg. To ensure this,
1398  * the io context should have a locator key set (see
1399  * rados_ioctx_locator_set_key()).
1400  *
1401  * @param io the context in which the data is cloned
1402  * @param dst the name of the destination object
1403  * @param dst_off the offset within the destination object (in bytes)
1404  * @param src the name of the source object
1405  * @param src_off the offset within the source object (in bytes)
1406  * @param len how much data to copy
1407  * @returns 0 on success, negative error code on failure
1408  */
1409 int rados_clone_range (
1410     rados_ioctx_t io,
1411     const(char)* dst,
1412     ulong dst_off,
1413     const(char)* src,
1414     ulong src_off,
1415     size_t len);
1416 
1417 /**
1418  * Append *len* bytes from *buf* into the *oid* object. The value of
1419  * *len* must be <= UINT_MAX/2.
1420  *
1421  * @param io the context to operate in
1422  * @param oid the name of the object
1423  * @param buf the data to append
1424  * @param len length of buf (in bytes)
1425  * @returns 0 on success, negative error code on failure
1426  */
1427 int rados_append (
1428     rados_ioctx_t io,
1429     const(char)* oid,
1430     const(char)* buf,
1431     size_t len);
1432 
1433 /**
1434  * Read data from an object
1435  *
1436  * The io context determines the snapshot to read from, if any was set
1437  * by rados_ioctx_snap_set_read().
1438  *
1439  * @param io the context in which to perform the read
1440  * @param oid the name of the object to read from
1441  * @param buf where to store the results
1442  * @param len the number of bytes to read
1443  * @param off the offset to start reading from in the object
1444  * @returns number of bytes read on success, negative error code on
1445  * failure
1446  */
1447 int rados_read (
1448     rados_ioctx_t io,
1449     const(char)* oid,
1450     char* buf,
1451     size_t len,
1452     ulong off);
1453 
1454 /**
1455  * Delete an object
1456  *
1457  * @note This does not delete any snapshots of the object.
1458  *
1459  * @param io the pool to delete the object from
1460  * @param oid the name of the object to delete
1461  * @returns 0 on success, negative error code on failure
1462  */
1463 int rados_remove (rados_ioctx_t io, const(char)* oid);
1464 
1465 /**
1466  * Resize an object
1467  *
1468  * If this enlarges the object, the new area is logically filled with
1469  * zeroes. If this shrinks the object, the excess data is removed.
1470  *
1471  * @param io the context in which to truncate
1472  * @param oid the name of the object
1473  * @param size the new size of the object in bytes
1474  * @returns 0 on success, negative error code on failure
1475  */
1476 int rados_trunc (rados_ioctx_t io, const(char)* oid, ulong size);
1477 
1478 /**
1479  * @name Xattrs
1480  * Extended attributes are stored as extended attributes on the files
1481  * representing an object on the OSDs. Thus, they have the same
1482  * limitations as the underlying filesystem. On ext4, this means that
1483  * the total data stored in xattrs cannot exceed 4KB.
1484  *
1485  * @{
1486  */
1487 
1488 /**
1489  * Get the value of an extended attribute on an object.
1490  *
1491  * @param io the context in which the attribute is read
1492  * @param o name of the object
1493  * @param name which extended attribute to read
1494  * @param buf where to store the result
1495  * @param len size of buf in bytes
1496  * @returns length of xattr value on success, negative error code on failure
1497  */
1498 int rados_getxattr (
1499     rados_ioctx_t io,
1500     const(char)* o,
1501     const(char)* name,
1502     char* buf,
1503     size_t len);
1504 
1505 /**
1506  * Set an extended attribute on an object.
1507  *
1508  * @param io the context in which xattr is set
1509  * @param o name of the object
1510  * @param name which extended attribute to set
1511  * @param buf what to store in the xattr
1512  * @param len the number of bytes in buf
1513  * @returns 0 on success, negative error code on failure
1514  */
1515 int rados_setxattr (
1516     rados_ioctx_t io,
1517     const(char)* o,
1518     const(char)* name,
1519     const(char)* buf,
1520     size_t len);
1521 
1522 /**
1523  * Delete an extended attribute from an object.
1524  *
1525  * @param io the context in which to delete the xattr
1526  * @param o the name of the object
1527  * @param name which xattr to delete
1528  * @returns 0 on success, negative error code on failure
1529  */
1530 int rados_rmxattr (rados_ioctx_t io, const(char)* o, const(char)* name);
1531 
1532 /**
1533  * Start iterating over xattrs on an object.
1534  *
1535  * @post iter is a valid iterator
1536  *
1537  * @param io the context in which to list xattrs
1538  * @param oid name of the object
1539  * @param iter where to store the iterator
1540  * @returns 0 on success, negative error code on failure
1541  */
1542 int rados_getxattrs (
1543     rados_ioctx_t io,
1544     const(char)* oid,
1545     rados_xattrs_iter_t* iter);
1546 
1547 /**
1548  * Get the next xattr on the object
1549  *
1550  * @pre iter is a valid iterator
1551  *
1552  * @post name is the NULL-terminated name of the next xattr, and val
1553  * contains the value of the xattr, which is of length len. If the end
1554  * of the list has been reached, name and val are NULL, and len is 0.
1555  *
1556  * @param iter iterator to advance
1557  * @param name where to store the name of the next xattr
1558  * @param val where to store the value of the next xattr
1559  * @param len the number of bytes in val
1560  * @returns 0 on success, negative error code on failure
1561  */
1562 int rados_getxattrs_next (
1563     rados_xattrs_iter_t iter,
1564     const(char*)* name,
1565     const(char*)* val,
1566     size_t* len);
1567 
1568 /**
1569  * Close the xattr iterator.
1570  *
1571  * iter should not be used after this is called.
1572  *
1573  * @param iter the iterator to close
1574  */
1575 void rados_getxattrs_end (rados_xattrs_iter_t iter);
1576 
1577 /** @} Xattrs */
1578 
1579 /**
1580  * Get the next omap key/value pair on the object
1581  *
1582  * @pre iter is a valid iterator
1583  *
1584  * @post key and val are the next key/value pair. key is
1585  * null-terminated, and val has length len. If the end of the list has
1586  * been reached, key and val are NULL, and len is 0. key and val will
1587  * not be accessible after rados_omap_get_end() is called on iter, so
1588  * if they are needed after that they should be copied.
1589  *
1590  * @param iter iterator to advance
1591  * @param key where to store the key of the next omap entry
1592  * @param val where to store the value of the next omap entry
1593  * @param len where to store the number of bytes in val
1594  * @returns 0 on success, negative error code on failure
1595  */
1596 int rados_omap_get_next (
1597     rados_omap_iter_t iter,
1598     char** key,
1599     char** val,
1600     size_t* len);
1601 
1602 /**
1603  * Close the omap iterator.
1604  *
1605  * iter should not be used after this is called.
1606  *
1607  * @param iter the iterator to close
1608  */
1609 void rados_omap_get_end (rados_omap_iter_t iter);
1610 
1611 /**
1612  * Get object stats (size/mtime)
1613  *
1614  * TODO: when are these set, and by whom? can they be out of date?
1615  *
1616  * @param io ioctx
1617  * @param o object name
1618  * @param psize where to store object size
1619  * @param pmtime where to store modification time
1620  * @returns 0 on success, negative error code on failure
1621  */
1622 int rados_stat (rados_ioctx_t io, const(char)* o, ulong* psize, time_t* pmtime);
1623 
1624 /**
1625  * Update tmap (trivial map)
1626  *
1627  * Do compound update to a tmap object, inserting or deleting some
1628  * number of records.  cmdbuf is a series of operation byte
1629  * codes, following by command payload.  Each command is a single-byte
1630  * command code, whose value is one of CEPH_OSD_TMAP_*.
1631  *
1632  *  - update tmap 'header'
1633  *    - 1 byte  = CEPH_OSD_TMAP_HDR
1634  *    - 4 bytes = data length (little endian)
1635  *    - N bytes = data
1636  *
1637  *  - insert/update one key/value pair
1638  *    - 1 byte  = CEPH_OSD_TMAP_SET
1639  *    - 4 bytes = key name length (little endian)
1640  *    - N bytes = key name
1641  *    - 4 bytes = data length (little endian)
1642  *    - M bytes = data
1643  *
1644  *  - insert one key/value pair; return -EEXIST if it already exists.
1645  *    - 1 byte  = CEPH_OSD_TMAP_CREATE
1646  *    - 4 bytes = key name length (little endian)
1647  *    - N bytes = key name
1648  *    - 4 bytes = data length (little endian)
1649  *    - M bytes = data
1650  *
1651  *  - remove one key/value pair
1652  *    - 1 byte  = CEPH_OSD_TMAP_RM
1653  *    - 4 bytes = key name length (little endian)
1654  *    - N bytes = key name
1655  *
1656  * Restrictions:
1657  *  - The HDR update must preceed any key/value updates.
1658  *  - All key/value updates must be in lexicographically sorted order
1659  *    in cmdbuf.
1660  *  - You can read/write to a tmap object via the regular APIs, but
1661  *    you should be careful not to corrupt it.  Also be aware that the
1662  *    object format may change without notice.
1663  *
1664  * @param io ioctx
1665  * @param o object name
1666  * @param cmdbuf command buffer
1667  * @param cmdbuflen command buffer length in bytes
1668  * @returns 0 on success, negative error code on failure
1669  */
1670 int rados_tmap_update (
1671     rados_ioctx_t io,
1672     const(char)* o,
1673     const(char)* cmdbuf,
1674     size_t cmdbuflen);
1675 
1676 /**
1677  * Store complete tmap (trivial map) object
1678  *
1679  * Put a full tmap object into the store, replacing what was there.
1680  *
1681  * The format of buf is:
1682  * - 4 bytes - length of header (little endian)
1683  * - N bytes - header data
1684  * - 4 bytes - number of keys (little endian)
1685  *
1686  * and for each key,
1687  * - 4 bytes - key name length (little endian)
1688  * - N bytes - key name
1689  * - 4 bytes - value length (little endian)
1690  * - M bytes - value data
1691  *
1692  * @param io ioctx
1693  * @param o object name
1694  * @param buf buffer
1695  * @param buflen buffer length in bytes
1696  * @returns 0 on success, negative error code on failure
1697  */
1698 int rados_tmap_put (
1699     rados_ioctx_t io,
1700     const(char)* o,
1701     const(char)* buf,
1702     size_t buflen);
1703 
1704 /**
1705  * Fetch complete tmap (trivial map) object
1706  *
1707  * Read a full tmap object.  See rados_tmap_put() for the format the
1708  * data is returned in.
1709  *
1710  * @param io ioctx
1711  * @param o object name
1712  * @param buf buffer
1713  * @param buflen buffer length in bytes
1714  * @returns 0 on success, negative error code on failure
1715  * @returns -ERANGE if buf isn't big enough
1716  */
1717 int rados_tmap_get (rados_ioctx_t io, const(char)* o, char* buf, size_t buflen);
1718 
1719 /**
1720  * Execute an OSD class method on an object
1721  *
1722  * The OSD has a plugin mechanism for performing complicated
1723  * operations on an object atomically. These plugins are called
1724  * classes. This function allows librados users to call the custom
1725  * methods. The input and output formats are defined by the class.
1726  * Classes in ceph.git can be found in src/cls subdirectories
1727  *
1728  * @param io the context in which to call the method
1729  * @param oid the object to call the method on
1730  * @param cls the name of the class
1731  * @param method the name of the method
1732  * @param in_buf where to find input
1733  * @param in_len length of in_buf in bytes
1734  * @param buf where to store output
1735  * @param out_len length of buf in bytes
1736  * @returns the length of the output, or
1737  * -ERANGE if out_buf does not have enough space to store it (For methods that return data). For
1738  * methods that don't return data, the return value is
1739  * method-specific.
1740  */
1741 int rados_exec (
1742     rados_ioctx_t io,
1743     const(char)* oid,
1744     const(char)* cls,
1745     const(char)* method,
1746     const(char)* in_buf,
1747     size_t in_len,
1748     char* buf,
1749     size_t out_len);
1750 
1751 /** @} Synchronous I/O */
1752 
1753 /**
1754  * @name Asynchronous I/O
1755  * Read and write to objects without blocking.
1756  *
1757  * @{
1758  */
1759 
1760 /**
1761  * @typedef rados_completion_t
1762  * Represents the state of an asynchronous operation - it contains the
1763  * return value once the operation completes, and can be used to block
1764  * until the operation is complete or safe.
1765  */
1766 alias void* rados_completion_t;
1767 
1768 /**
1769  * @typedef rados_callback_t
1770  * Callbacks for asynchrous operations take two parameters:
1771  * - cb the completion that has finished
1772  * - arg application defined data made available to the callback function
1773  */
1774 alias void function (rados_completion_t cb, void* arg) rados_callback_t;
1775 
1776 /**
1777  * Constructs a completion to use with asynchronous operations
1778  *
1779  * The complete and safe callbacks correspond to operations being
1780  * acked and committed, respectively. The callbacks are called in
1781  * order of receipt, so the safe callback may be triggered before the
1782  * complete callback, and vice versa. This is affected by journalling
1783  * on the OSDs.
1784  *
1785  * TODO: more complete documentation of this elsewhere (in the RADOS docs?)
1786  *
1787  * @note Read operations only get a complete callback.
1788  * @note BUG: this should check for ENOMEM instead of throwing an exception
1789  *
1790  * @param cb_arg application-defined data passed to the callback functions
1791  * @param cb_complete the function to be called when the operation is
1792  * in memory on all relpicas
1793  * @param cb_safe the function to be called when the operation is on
1794  * stable storage on all replicas
1795  * @param pc where to store the completion
1796  * @returns 0
1797  */
1798 int rados_aio_create_completion (
1799     void* cb_arg,
1800     rados_callback_t cb_complete,
1801     rados_callback_t cb_safe,
1802     rados_completion_t* pc);
1803 
1804 /**
1805  * Block until an operation completes
1806  *
1807  * This means it is in memory on all replicas.
1808  *
1809  * @note BUG: this should be void
1810  *
1811  * @param c operation to wait for
1812  * @returns 0
1813  */
1814 int rados_aio_wait_for_complete (rados_completion_t c);
1815 
1816 /**
1817  * Block until an operation is safe
1818  *
1819  * This means it is on stable storage on all replicas.
1820  *
1821  * @note BUG: this should be void
1822  *
1823  * @param c operation to wait for
1824  * @returns 0
1825  */
1826 int rados_aio_wait_for_safe (rados_completion_t c);
1827 
1828 /**
1829  * Has an asynchronous operation completed?
1830  *
1831  * @warning This does not imply that the complete callback has
1832  * finished
1833  *
1834  * @param c async operation to inspect
1835  * @returns whether c is complete
1836  */
1837 int rados_aio_is_complete (rados_completion_t c);
1838 
1839 /**
1840  * Is an asynchronous operation safe?
1841  *
1842  * @warning This does not imply that the safe callback has
1843  * finished
1844  *
1845  * @param c async operation to inspect
1846  * @returns whether c is safe
1847  */
1848 int rados_aio_is_safe (rados_completion_t c);
1849 
1850 /**
1851  * Block until an operation completes and callback completes
1852  *
1853  * This means it is in memory on all replicas and can be read.
1854  *
1855  * @note BUG: this should be void
1856  *
1857  * @param c operation to wait for
1858  * @returns 0
1859  */
1860 int rados_aio_wait_for_complete_and_cb (rados_completion_t c);
1861 
1862 /**
1863  * Block until an operation is safe and callback has completed
1864  *
1865  * This means it is on stable storage on all replicas.
1866  *
1867  * @note BUG: this should be void
1868  *
1869  * @param c operation to wait for
1870  * @returns 0
1871  */
1872 int rados_aio_wait_for_safe_and_cb (rados_completion_t c);
1873 
1874 /**
1875  * Has an asynchronous operation and callback completed
1876  *
1877  * @param c async operation to inspect
1878  * @returns whether c is complete
1879  */
1880 int rados_aio_is_complete_and_cb (rados_completion_t c);
1881 
1882 /**
1883  * Is an asynchronous operation safe and has the callback completed
1884  *
1885  * @param c async operation to inspect
1886  * @returns whether c is safe
1887  */
1888 int rados_aio_is_safe_and_cb (rados_completion_t c);
1889 
1890 /**
1891  * Get the return value of an asychronous operation
1892  *
1893  * The return value is set when the operation is complete or safe,
1894  * whichever comes first.
1895  *
1896  * @pre The operation is safe or complete
1897  *
1898  * @note BUG: complete callback may never be called when the safe
1899  * message is received before the complete message
1900  *
1901  * @param c async operation to inspect
1902  * @returns return value of the operation
1903  */
1904 int rados_aio_get_return_value (rados_completion_t c);
1905 
1906 /**
1907  * Get the internal object version of the target of an asychronous operation
1908  *
1909  * The return value is set when the operation is complete or safe,
1910  * whichever comes first.
1911  *
1912  * @pre The operation is safe or complete
1913  *
1914  * @note BUG: complete callback may never be called when the safe
1915  * message is received before the complete message
1916  *
1917  * @param c async operation to inspect
1918  * @returns version number of the asychronous operation's target
1919  */
1920 ulong rados_aio_get_version (rados_completion_t c);
1921 
1922 /**
1923  * Release a completion
1924  *
1925  * Call this when you no longer need the completion. It may not be
1926  * freed immediately if the operation is not acked and committed.
1927  *
1928  * @param c completion to release
1929  */
1930 void rados_aio_release (rados_completion_t c);
1931 
1932 /**
1933  * Write data to an object asynchronously
1934  *
1935  * Queues the write and returns. The return value of the completion
1936  * will be 0 on success, negative error code on failure.
1937  *
1938  * @param io the context in which the write will occur
1939  * @param oid name of the object
1940  * @param completion what to do when the write is safe and complete
1941  * @param buf data to write
1942  * @param len length of the data, in bytes
1943  * @param off byte offset in the object to begin writing at
1944  * @returns 0 on success, -EROFS if the io context specifies a snap_seq
1945  * other than LIBRADOS_SNAP_HEAD
1946  */
1947 int rados_aio_write (
1948     rados_ioctx_t io,
1949     const(char)* oid,
1950     rados_completion_t completion,
1951     const(char)* buf,
1952     size_t len,
1953     ulong off);
1954 
1955 /**
1956  * Asychronously append data to an object
1957  *
1958  * Queues the append and returns.
1959  *
1960  * The return value of the completion will be 0 on success, negative
1961  * error code on failure.
1962  *
1963  * @param io the context to operate in
1964  * @param oid the name of the object
1965  * @param completion what to do when the append is safe and complete
1966  * @param buf the data to append
1967  * @param len length of buf (in bytes)
1968  * @returns 0 on success, -EROFS if the io context specifies a snap_seq
1969  * other than LIBRADOS_SNAP_HEAD
1970  */
1971 int rados_aio_append (
1972     rados_ioctx_t io,
1973     const(char)* oid,
1974     rados_completion_t completion,
1975     const(char)* buf,
1976     size_t len);
1977 
1978 /**
1979  * Asychronously write an entire object
1980  *
1981  * The object is filled with the provided data. If the object exists,
1982  * it is atomically truncated and then written.
1983  * Queues the write_full and returns.
1984  *
1985  * The return value of the completion will be 0 on success, negative
1986  * error code on failure.
1987  *
1988  * @param io the io context in which the write will occur
1989  * @param oid name of the object
1990  * @param completion what to do when the write_full is safe and complete
1991  * @param buf data to write
1992  * @param len length of the data, in bytes
1993  * @returns 0 on success, -EROFS if the io context specifies a snap_seq
1994  * other than LIBRADOS_SNAP_HEAD
1995  */
1996 int rados_aio_write_full (
1997     rados_ioctx_t io,
1998     const(char)* oid,
1999     rados_completion_t completion,
2000     const(char)* buf,
2001     size_t len);
2002 
2003 /**
2004  * Asychronously write the same buffer multiple times
2005  *
2006  * Queues the writesame and returns.
2007  *
2008  * The return value of the completion will be 0 on success, negative
2009  * error code on failure.
2010  *
2011  * @param io the io context in which the write will occur
2012  * @param oid name of the object
2013  * @param completion what to do when the writesame is safe and complete
2014  * @param buf data to write
2015  * @param data_len length of the data, in bytes
2016  * @param write_len the total number of bytes to write
2017  * @param off byte offset in the object to begin writing at
2018  * @returns 0 on success, -EROFS if the io context specifies a snap_seq
2019  * other than LIBRADOS_SNAP_HEAD
2020  */
2021 int rados_aio_writesame (
2022     rados_ioctx_t io,
2023     const(char)* oid,
2024     rados_completion_t completion,
2025     const(char)* buf,
2026     size_t data_len,
2027     size_t write_len,
2028     ulong off);
2029 
2030 /**
2031  * Asychronously remove an object
2032  *
2033  * Queues the remove and returns.
2034  *
2035  * The return value of the completion will be 0 on success, negative
2036  * error code on failure.
2037  *
2038  * @param io the context to operate in
2039  * @param oid the name of the object
2040  * @param completion what to do when the remove is safe and complete
2041  * @returns 0 on success, -EROFS if the io context specifies a snap_seq
2042  * other than LIBRADOS_SNAP_HEAD
2043  */
2044 int rados_aio_remove (
2045     rados_ioctx_t io,
2046     const(char)* oid,
2047     rados_completion_t completion);
2048 
2049 /**
2050  * Asychronously read data from an object
2051  *
2052  * The io context determines the snapshot to read from, if any was set
2053  * by rados_ioctx_snap_set_read().
2054  *
2055  * The return value of the completion will be number of bytes read on
2056  * success, negative error code on failure.
2057  *
2058  * @note only the 'complete' callback of the completion will be called.
2059  *
2060  * @param io the context in which to perform the read
2061  * @param oid the name of the object to read from
2062  * @param completion what to do when the read is complete
2063  * @param buf where to store the results
2064  * @param len the number of bytes to read
2065  * @param off the offset to start reading from in the object
2066  * @returns 0 on success, negative error code on failure
2067  */
2068 int rados_aio_read (
2069     rados_ioctx_t io,
2070     const(char)* oid,
2071     rados_completion_t completion,
2072     char* buf,
2073     size_t len,
2074     ulong off);
2075 
2076 /**
2077  * Block until all pending writes in an io context are safe
2078  *
2079  * This is not equivalent to calling rados_aio_wait_for_safe() on all
2080  * write completions, since this waits for the associated callbacks to
2081  * complete as well.
2082  *
2083  * @note BUG: always returns 0, should be void or accept a timeout
2084  *
2085  * @param io the context to flush
2086  * @returns 0 on success, negative error code on failure
2087  */
2088 int rados_aio_flush (rados_ioctx_t io);
2089 
2090 /**
2091  * Schedule a callback for when all currently pending
2092  * aio writes are safe. This is a non-blocking version of
2093  * rados_aio_flush().
2094  *
2095  * @param io the context to flush
2096  * @param completion what to do when the writes are safe
2097  * @returns 0 on success, negative error code on failure
2098  */
2099 int rados_aio_flush_async (rados_ioctx_t io, rados_completion_t completion);
2100 
2101 /**
2102  * Asynchronously get object stats (size/mtime)
2103  *
2104  * @param io ioctx
2105  * @param o object name
2106  * @param psize where to store object size
2107  * @param pmtime where to store modification time
2108  * @returns 0 on success, negative error code on failure
2109  */
2110 int rados_aio_stat (
2111     rados_ioctx_t io,
2112     const(char)* o,
2113     rados_completion_t completion,
2114     ulong* psize,
2115     time_t* pmtime);
2116 
2117 /**
2118  * Cancel async operation
2119  *
2120  * @param io ioctx
2121  * @param completion completion handle
2122  * @returns 0 on success, negative error code on failure
2123  */
2124 int rados_aio_cancel (rados_ioctx_t io, rados_completion_t completion);
2125 
2126 /** @} Asynchronous I/O */
2127 
2128 /**
2129  * @name Watch/Notify
2130  *
2131  * Watch/notify is a protocol to help communicate among clients. It
2132  * can be used to sychronize client state. All that's needed is a
2133  * well-known object name (for example, rbd uses the header object of
2134  * an image).
2135  *
2136  * Watchers register an interest in an object, and receive all
2137  * notifies on that object. A notify attempts to communicate with all
2138  * clients watching an object, and blocks on the notifier until each
2139  * client responds or a timeout is reached.
2140  *
2141  * See rados_watch() and rados_notify() for more details.
2142  *
2143  * @{
2144  */
2145 
2146 /**
2147  * @typedef rados_watchcb_t
2148  *
2149  * Callback activated when a notify is received on a watched
2150  * object.
2151  *
2152  * @param opcode undefined
2153  * @param ver version of the watched object
2154  * @param arg application-specific data
2155  *
2156  * @note BUG: opcode is an internal detail that shouldn't be exposed
2157  * @note BUG: ver is unused
2158  */
2159 alias void function (ubyte opcode, ulong ver, void* arg) rados_watchcb_t;
2160 
2161 /**
2162  * @typedef rados_watchcb2_t
2163  *
2164  * Callback activated when a notify is received on a watched
2165  * object.
2166  *
2167  * @param arg opaque user-defined value provided to rados_watch2()
2168  * @param notify_id an id for this notify event
2169  * @param handle the watcher handle we are notifying
2170  * @param notifier_id the unique client id for the notifier
2171  * @param data payload from the notifier
2172  * @param datalen length of payload buffer
2173  */
2174 alias void function (void* arg, ulong notify_id, ulong handle, ulong notifier_id, void* data, size_t data_len) rados_watchcb2_t;
2175 
2176 /**
2177  * @typedef rados_watcherrcb_t
2178  *
2179  * Callback activated when we encounter an error with the watch session.
2180  * This can happen when the location of the objects moves within the
2181  * cluster and we fail to register our watch with the new object location,
2182  * or when our connection with the object OSD is otherwise interrupted and
2183  * we may have missed notify events.
2184  *
2185  * @param pre opaque user-defined value provided to rados_watch2()
2186  * @param err error code
2187  */
2188 alias void function (void* pre, ulong cookie, int err) rados_watcherrcb_t;
2189 
2190 /**
2191  * Register an interest in an object
2192  *
2193  * A watch operation registers the client as being interested in
2194  * notifications on an object. OSDs keep track of watches on
2195  * persistent storage, so they are preserved across cluster changes by
2196  * the normal recovery process. If the client loses its connection to
2197  * the primary OSD for a watched object, the watch will be removed
2198  * after 30 seconds. Watches are automatically reestablished when a new
2199  * connection is made, or a placement group switches OSDs.
2200  *
2201  * @note BUG: watch timeout should be configurable
2202  * @note BUG: librados should provide a way for watchers to notice connection resets
2203  * @note BUG: the ver parameter does not work, and -ERANGE will never be returned
2204  *            (See URL tracker.ceph.com/issues/2592)
2205  *
2206  * @param io the pool the object is in
2207  * @param o the object to watch
2208  * @param ver expected version of the object
2209  * @param cookie where to store the internal id assigned to this watch
2210  * @param watchcb what to do when a notify is received on this object
2211  * @param arg application defined data to pass when watchcb is called
2212  * @returns 0 on success, negative error code on failure
2213  * @returns -ERANGE if the version of the object is greater than ver
2214  */
2215 int rados_watch (
2216     rados_ioctx_t io,
2217     const(char)* o,
2218     ulong ver,
2219     ulong* cookie,
2220     rados_watchcb_t watchcb,
2221     void* arg);
2222 
2223 /**
2224  * Register an interest in an object
2225  *
2226  * A watch operation registers the client as being interested in
2227  * notifications on an object. OSDs keep track of watches on
2228  * persistent storage, so they are preserved across cluster changes by
2229  * the normal recovery process. If the client loses its connection to
2230  * the primary OSD for a watched object, the watch will be removed
2231  * after 30 seconds. Watches are automatically reestablished when a new
2232  * connection is made, or a placement group switches OSDs.
2233  *
2234  * @note BUG: watch timeout should be configurable
2235  *
2236  * @param io the pool the object is in
2237  * @param o the object to watch
2238  * @param cookie where to store the internal id assigned to this watch
2239  * @param watchcb what to do when a notify is received on this object
2240  * @param watcherrcb what to do when the watch session encounters an error
2241  * @param arg opaque value to pass to the callback
2242  * @returns 0 on success, negative error code on failure
2243  */
2244 int rados_watch2 (
2245     rados_ioctx_t io,
2246     const(char)* o,
2247     ulong* cookie,
2248     rados_watchcb2_t watchcb,
2249     rados_watcherrcb_t watcherrcb,
2250     void* arg);
2251 
2252 /**
2253  * Asynchronous register an interest in an object
2254  *
2255  * A watch operation registers the client as being interested in
2256  * notifications on an object. OSDs keep track of watches on
2257  * persistent storage, so they are preserved across cluster changes by
2258  * the normal recovery process. If the client loses its connection to
2259  * the primary OSD for a watched object, the watch will be removed
2260  * after 30 seconds. Watches are automatically reestablished when a new
2261  * connection is made, or a placement group switches OSDs.
2262  *
2263  * @note BUG: watch timeout should be configurable
2264  *
2265  * @param io the pool the object is in
2266  * @param o the object to watch
2267  * @param completion what to do when operation has been attempted
2268  * @param handle where to store the internal id assigned to this watch
2269  * @param watchcb what to do when a notify is received on this object
2270  * @param watcherrcb what to do when the watch session encounters an error
2271  * @param arg opaque value to pass to the callback
2272  * @returns 0 on success, negative error code on failure
2273  */
2274 int rados_aio_watch (
2275     rados_ioctx_t io,
2276     const(char)* o,
2277     rados_completion_t completion,
2278     ulong* handle,
2279     rados_watchcb2_t watchcb,
2280     rados_watcherrcb_t watcherrcb,
2281     void* arg);
2282 
2283 /**
2284  * Check on the status of a watch
2285  *
2286  * Return the number of milliseconds since the watch was last confirmed.
2287  * Or, if there has been an error, return that.
2288  *
2289  * If there is an error, the watch is no longer valid, and should be
2290  * destroyed with rados_unwatch2().  The the user is still interested
2291  * in the object, a new watch should be created with rados_watch2().
2292  *
2293  * @param io the pool the object is in
2294  * @param cookie the watch handle
2295  * @returns ms since last confirmed on success, negative error code on failure
2296  */
2297 int rados_watch_check (rados_ioctx_t io, ulong cookie);
2298 
2299 /**
2300  * Unregister an interest in an object
2301  *
2302  * Once this completes, no more notifies will be sent to us for this
2303  * watch. This should be called to clean up unneeded watchers.
2304  *
2305  * @param io the pool the object is in
2306  * @param o the name of the watched object (ignored)
2307  * @param cookie which watch to unregister
2308  * @returns 0 on success, negative error code on failure
2309  */
2310 int rados_unwatch (rados_ioctx_t io, const(char)* o, ulong cookie);
2311 
2312 /**
2313  * Unregister an interest in an object
2314  *
2315  * Once this completes, no more notifies will be sent to us for this
2316  * watch. This should be called to clean up unneeded watchers.
2317  *
2318  * @param io the pool the object is in
2319  * @param cookie which watch to unregister
2320  * @returns 0 on success, negative error code on failure
2321  */
2322 int rados_unwatch2 (rados_ioctx_t io, ulong cookie);
2323 
2324 /**
2325  * Asynchronous unregister an interest in an object
2326  *
2327  * Once this completes, no more notifies will be sent to us for this
2328  * watch. This should be called to clean up unneeded watchers.
2329  *
2330  * @param io the pool the object is in
2331  * @param completion what to do when operation has been attempted
2332  * @param cookie which watch to unregister
2333  * @returns 0 on success, negative error code on failure
2334  */
2335 int rados_aio_unwatch (
2336     rados_ioctx_t io,
2337     ulong cookie,
2338     rados_completion_t completion);
2339 
2340 /**
2341  * Sychronously notify watchers of an object
2342  *
2343  * This blocks until all watchers of the object have received and
2344  * reacted to the notify, or a timeout is reached.
2345  *
2346  * @note BUG: the timeout is not changeable via the C API
2347  * @note BUG: the bufferlist is inaccessible in a rados_watchcb_t
2348  *
2349  * @param io the pool the object is in
2350  * @param o the name of the object
2351  * @param ver obsolete - just pass zero
2352  * @param buf data to send to watchers
2353  * @param buf_len length of buf in bytes
2354  * @returns 0 on success, negative error code on failure
2355  */
2356 int rados_notify (
2357     rados_ioctx_t io,
2358     const(char)* o,
2359     ulong ver,
2360     const(char)* buf,
2361     int buf_len);
2362 
2363 /**
2364  * Sychronously notify watchers of an object
2365  *
2366  * This blocks until all watchers of the object have received and
2367  * reacted to the notify, or a timeout is reached.
2368  *
2369  * The reply buffer is optional.  If specified, the client will get
2370  * back an encoded buffer that includes the ids of the clients that
2371  * acknowledged the notify as well as their notify ack payloads (if
2372  * any).  Clients that timed out are not included.  Even clients that
2373  * do not include a notify ack payload are included in the list but
2374  * have a 0-length payload associated with them.  The format:
2375  *
2376  *    le32 num_acks
2377  *    {
2378  *      le64 gid     global id for the client (for client.1234 that's 1234)
2379  *      le64 cookie  cookie for the client
2380  *      le32 buflen  length of reply message buffer
2381  *      u8 * buflen  payload
2382  *    } * num_acks
2383  *    le32 num_timeouts
2384  *    {
2385  *      le64 gid     global id for the client
2386  *      le64 cookie  cookie for the client
2387  *    } * num_timeouts
2388  *
2389  * Note: There may be multiple instances of the same gid if there are
2390  * multiple watchers registered via the same client.
2391  *
2392  * Note: The buffer must be released with rados_buffer_free() when the
2393  * user is done with it.
2394  *
2395  * Note: Since the result buffer includes clients that time out, it
2396  * will be set even when rados_notify() returns an error code (like
2397  * -ETIMEDOUT).
2398  *
2399  * @param io the pool the object is in
2400  * @param completion what to do when operation has been attempted
2401  * @param o the name of the object
2402  * @param buf data to send to watchers
2403  * @param buf_len length of buf in bytes
2404  * @param timeout_ms notify timeout (in ms)
2405  * @param reply_buffer pointer to reply buffer pointer (free with rados_buffer_free)
2406  * @param reply_buffer_len pointer to size of reply buffer
2407  * @returns 0 on success, negative error code on failure
2408  */
2409 int rados_notify2 (
2410     rados_ioctx_t io,
2411     const(char)* o,
2412     const(char)* buf,
2413     int buf_len,
2414     ulong timeout_ms,
2415     char** reply_buffer,
2416     size_t* reply_buffer_len);
2417 int rados_aio_notify (
2418     rados_ioctx_t io,
2419     const(char)* o,
2420     rados_completion_t completion,
2421     const(char)* buf,
2422     int buf_len,
2423     ulong timeout_ms,
2424     char** reply_buffer,
2425     size_t* reply_buffer_len);
2426 
2427 /**
2428  * Acknolwedge receipt of a notify
2429  *
2430  * @param io the pool the object is in
2431  * @param o the name of the object
2432  * @param notify_id the notify_id we got on the watchcb2_t callback
2433  * @param cookie the watcher handle
2434  * @param buf payload to return to notifier (optional)
2435  * @param buf_len payload length
2436  * @returns 0 on success
2437  */
2438 int rados_notify_ack (
2439     rados_ioctx_t io,
2440     const(char)* o,
2441     ulong notify_id,
2442     ulong cookie,
2443     const(char)* buf,
2444     int buf_len);
2445 
2446 /**
2447  * Flush watch/notify callbacks
2448  *
2449  * This call will block until all pending watch/notify callbacks have
2450  * been executed and the queue is empty.  It should usually be called
2451  * after shutting down any watches before shutting down the ioctx or
2452  * librados to ensure that any callbacks do not misuse the ioctx (for
2453  * example by calling rados_notify_ack after the ioctx has been
2454  * destroyed).
2455  *
2456  * @param cluster the cluster handle
2457  */
2458 int rados_watch_flush (rados_t cluster);
2459 /**
2460  * Flush watch/notify callbacks
2461  *
2462  * This call will be nonblock, and the completion will be called
2463  * until all pending watch/notify callbacks have been executed and
2464  * the queue is empty.  It should usually be called after shutting
2465  * down any watches before shutting down the ioctx or
2466  * librados to ensure that any callbacks do not misuse the ioctx (for
2467  * example by calling rados_notify_ack after the ioctx has been
2468  * destroyed).
2469  *
2470  * @param cluster the cluster handle
2471  * @param completion what to do when operation has been attempted
2472  */
2473 int rados_aio_watch_flush (rados_t cluster, rados_completion_t completion);
2474 
2475 /** @} Watch/Notify */
2476 
2477 /**
2478  * Pin an object in the cache tier
2479  *
2480  * When an object is pinned in the cache tier, it stays in the cache
2481  * tier, and won't be flushed out.
2482  *
2483  * @param io the pool the object is in
2484  * @param o the object id
2485  * @returns 0 on success, negative error code on failure
2486  */
2487 int rados_cache_pin (rados_ioctx_t io, const(char)* o);
2488 
2489 /**
2490  * Unpin an object in the cache tier
2491  *
2492  * After an object is unpinned in the cache tier, it can be flushed out
2493  *
2494  * @param io the pool the object is in
2495  * @param o the object id
2496  * @returns 0 on success, negative error code on failure
2497  */
2498 int rados_cache_unpin (rados_ioctx_t io, const(char)* o);
2499 
2500 /**
2501  * @name Hints
2502  *
2503  * @{
2504  */
2505 
2506 /**
2507  * Set allocation hint for an object
2508  *
2509  * This is an advisory operation, it will always succeed (as if it was
2510  * submitted with a LIBRADOS_OP_FLAG_FAILOK flag set) and is not
2511  * guaranteed to do anything on the backend.
2512  *
2513  * @param io the pool the object is in
2514  * @param o the name of the object
2515  * @param expected_object_size expected size of the object, in bytes
2516  * @param expected_write_size expected size of writes to the object, in bytes
2517  * @returns 0 on success, negative error code on failure
2518  */
2519 int rados_set_alloc_hint (
2520     rados_ioctx_t io,
2521     const(char)* o,
2522     ulong expected_object_size,
2523     ulong expected_write_size);
2524 
2525 /**
2526  * Set allocation hint for an object
2527  *
2528  * This is an advisory operation, it will always succeed (as if it was
2529  * submitted with a LIBRADOS_OP_FLAG_FAILOK flag set) and is not
2530  * guaranteed to do anything on the backend.
2531  *
2532  * @param io the pool the object is in
2533  * @param o the name of the object
2534  * @param expected_object_size expected size of the object, in bytes
2535  * @param expected_write_size expected size of writes to the object, in bytes
2536  * @param flags hints about future IO patterns
2537  * @returns 0 on success, negative error code on failure
2538  */
2539 int rados_set_alloc_hint2 (
2540     rados_ioctx_t io,
2541     const(char)* o,
2542     ulong expected_object_size,
2543     ulong expected_write_size,
2544     uint flags);
2545 
2546 /** @} Hints */
2547 
2548 /**
2549  * @name Object Operations
2550  *
2551  * A single rados operation can do multiple operations on one object
2552  * atomicly. The whole operation will suceed or fail, and no partial
2553  * results will be visible.
2554  *
2555  * Operations may be either reads, which can return data, or writes,
2556  * which cannot. The effects of writes are applied and visible all at
2557  * once, so an operation that sets an xattr and then checks its value
2558  * will not see the updated value.
2559  *
2560  * @{
2561  */
2562 
2563 /**
2564  * Create a new rados_write_op_t write operation. This will store all actions
2565  * to be performed atomically. You must call rados_release_write_op when you are
2566  * finished with it.
2567  *
2568  * @returns non-NULL on success, NULL on memory allocation error.
2569  */
2570 rados_write_op_t rados_create_write_op ();
2571 
2572 /**
2573  * Free a rados_write_op_t, must be called when you're done with it.
2574  * @param write_op operation to deallocate, created with rados_create_write_op
2575  */
2576 void rados_release_write_op (rados_write_op_t write_op);
2577 
2578 /**
2579  * Set flags for the last operation added to this write_op.
2580  * At least one op must have been added to the write_op.
2581  * @param flags see librados.h constants beginning with LIBRADOS_OP_FLAG
2582  */
2583 void rados_write_op_set_flags (rados_write_op_t write_op, int flags);
2584 
2585 /**
2586  * Ensure that the object exists before writing
2587  * @param write_op operation to add this action to
2588  */
2589 void rados_write_op_assert_exists (rados_write_op_t write_op);
2590 
2591 /**
2592  * Ensure that the object exists and that its internal version
2593  * number is equal to "ver" before writing. "ver" should be a
2594  * version number previously obtained with rados_get_last_version().
2595  * - If the object's version is greater than the asserted version
2596  *   then rados_write_op_operate will return -ERANGE instead of
2597  *   executing the op.
2598  * - If the object's version is less than the asserted version
2599  *   then rados_write_op_operate will return -EOVERFLOW instead
2600  *   of executing the op.
2601  * @param write_op operation to add this action to
2602  * @param ver object version number
2603  */
2604 void rados_write_op_assert_version (rados_write_op_t write_op, ulong ver);
2605 
2606 /**
2607  * Ensure that given xattr satisfies comparison.
2608  * If the comparison is not satisfied, the return code of the
2609  * operation will be -ECANCELED
2610  * @param write_op operation to add this action to
2611  * @param name name of the xattr to look up
2612  * @param comparison_operator currently undocumented, look for
2613  * LIBRADOS_CMPXATTR_OP_EQ in librados.h
2614  * @param value buffer to compare actual xattr value to
2615  * @param value_len length of buffer to compare actual xattr value to
2616  */
2617 void rados_write_op_cmpxattr (
2618     rados_write_op_t write_op,
2619     const(char)* name,
2620     ubyte comparison_operator,
2621     const(char)* value,
2622     size_t value_len);
2623 
2624 /**
2625  * Ensure that the an omap value satisfies a comparison,
2626  * with the supplied value on the right hand side (i.e.
2627  * for OP_LT, the comparison is actual_value < value.
2628  *
2629  * @param write_op operation to add this action to
2630  * @param key which omap value to compare
2631  * @param comparison_operator one of LIBRADOS_CMPXATTR_OP_EQ,
2632    LIBRADOS_CMPXATTR_OP_LT, or LIBRADOS_CMPXATTR_OP_GT
2633  * @param val value to compare with
2634  * @param val_len length of value in bytes
2635  * @param prval where to store the return value from this action
2636  */
2637 void rados_write_op_omap_cmp (
2638     rados_write_op_t write_op,
2639     const(char)* key,
2640     ubyte comparison_operator,
2641     const(char)* val,
2642     size_t val_len,
2643     int* prval);
2644 
2645 /**
2646  * Set an xattr
2647  * @param write_op operation to add this action to
2648  * @param name name of the xattr
2649  * @param value buffer to set xattr to
2650  * @param value_len length of buffer to set xattr to
2651  */
2652 void rados_write_op_setxattr (
2653     rados_write_op_t write_op,
2654     const(char)* name,
2655     const(char)* value,
2656     size_t value_len);
2657 
2658 /**
2659  * Remove an xattr
2660  * @param write_op operation to add this action to
2661  * @param name name of the xattr to remove
2662  */
2663 void rados_write_op_rmxattr (rados_write_op_t write_op, const(char)* name);
2664 
2665 /**
2666  * Create the object
2667  * @param write_op operation to add this action to
2668  * @param exclusive set to either LIBRADOS_CREATE_EXCLUSIVE or
2669    LIBRADOS_CREATE_IDEMPOTENT
2670  * will error if the object already exists.
2671  * @param category category string (DEPRECATED, HAS NO EFFECT)
2672  */
2673 void rados_write_op_create (
2674     rados_write_op_t write_op,
2675     int exclusive,
2676     const(char)* category);
2677 
2678 /**
2679  * Write to offset
2680  * @param write_op operation to add this action to
2681  * @param offset offset to write to
2682  * @param buffer bytes to write
2683  * @param len length of buffer
2684  */
2685 void rados_write_op_write (
2686     rados_write_op_t write_op,
2687     const(char)* buffer,
2688     size_t len,
2689     ulong offset);
2690 
2691 /**
2692  * Write whole object, atomically replacing it.
2693  * @param write_op operation to add this action to
2694  * @param buffer bytes to write
2695  * @param len length of buffer
2696  */
2697 void rados_write_op_write_full (
2698     rados_write_op_t write_op,
2699     const(char)* buffer,
2700     size_t len);
2701 
2702 /**
2703  * Write the same buffer multiple times
2704  * @param write_op operation to add this action to
2705  * @param buffer bytes to write
2706  * @param data_len length of buffer
2707  * @param write_len total number of bytes to write, as a multiple of @data_len
2708  * @param offset offset to write to
2709  */
2710 void rados_write_op_writesame (
2711     rados_write_op_t write_op,
2712     const(char)* buffer,
2713     size_t data_len,
2714     size_t write_len,
2715     ulong offset);
2716 
2717 /**
2718  * Append to end of object.
2719  * @param write_op operation to add this action to
2720  * @param buffer bytes to write
2721  * @param len length of buffer
2722  */
2723 void rados_write_op_append (
2724     rados_write_op_t write_op,
2725     const(char)* buffer,
2726     size_t len);
2727 
2728 /**
2729  * Remove object
2730  * @param write_op operation to add this action to
2731  */
2732 void rados_write_op_remove (rados_write_op_t write_op);
2733 
2734 /**
2735  * Truncate an object
2736  * @param write_op operation to add this action to
2737  * @param offset Offset to truncate to
2738  */
2739 void rados_write_op_truncate (rados_write_op_t write_op, ulong offset);
2740 
2741 /**
2742  * Zero part of an object
2743  * @param write_op operation to add this action to
2744  * @param offset Offset to zero
2745  * @param len length to zero
2746  */
2747 void rados_write_op_zero (rados_write_op_t write_op, ulong offset, ulong len);
2748 
2749 /**
2750  * Execute an OSD class method on an object
2751  * See rados_exec() for general description.
2752  *
2753  * @param write_op operation to add this action to
2754  * @param cls the name of the class
2755  * @param method the name of the method
2756  * @param in_buf where to find input
2757  * @param in_len length of in_buf in bytes
2758  * @param prval where to store the return value from the method
2759  */
2760 void rados_write_op_exec (
2761     rados_write_op_t write_op,
2762     const(char)* cls,
2763     const(char)* method,
2764     const(char)* in_buf,
2765     size_t in_len,
2766     int* prval);
2767 
2768 /**
2769  * Set key/value pairs on an object
2770  *
2771  * @param write_op operation to add this action to
2772  * @param keys array of null-terminated char arrays representing keys to set
2773  * @param vals array of pointers to values to set
2774  * @param lens array of lengths corresponding to each value
2775  * @param num number of key/value pairs to set
2776  */
2777 void rados_write_op_omap_set (
2778     rados_write_op_t write_op,
2779     const(char*)* keys,
2780     const(char*)* vals,
2781     const(size_t)* lens,
2782     size_t num);
2783 
2784 /**
2785  * Remove key/value pairs from an object
2786  *
2787  * @param write_op operation to add this action to
2788  * @param keys array of null-terminated char arrays representing keys to remove
2789  * @param keys_len number of key/value pairs to remove
2790  */
2791 void rados_write_op_omap_rm_keys (
2792     rados_write_op_t write_op,
2793     const(char*)* keys,
2794     size_t keys_len);
2795 
2796 /**
2797  * Remove all key/value pairs from an object
2798  *
2799  * @param write_op operation to add this action to
2800  */
2801 void rados_write_op_omap_clear (rados_write_op_t write_op);
2802 
2803 /**
2804  * Set allocation hint for an object
2805  *
2806  * @param write_op operation to add this action to
2807  * @param expected_object_size expected size of the object, in bytes
2808  * @param expected_write_size expected size of writes to the object, in bytes
2809  */
2810 void rados_write_op_set_alloc_hint (
2811     rados_write_op_t write_op,
2812     ulong expected_object_size,
2813     ulong expected_write_size);
2814 
2815 /**
2816  * Set allocation hint for an object
2817  *
2818  * @param write_op operation to add this action to
2819  * @param expected_object_size expected size of the object, in bytes
2820  * @param expected_write_size expected size of writes to the object, in bytes
2821  * @param flags hints about future IO patterns
2822  */
2823 void rados_write_op_set_alloc_hint2 (
2824     rados_write_op_t write_op,
2825     ulong expected_object_size,
2826     ulong expected_write_size,
2827     uint flags);
2828 
2829 /**
2830  * Perform a write operation synchronously
2831  * @param write_op operation to perform
2832  * @param io the ioctx that the object is in
2833  * @param oid the object id
2834  * @param mtime the time to set the mtime to, NULL for the current time
2835  * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
2836  */
2837 int rados_write_op_operate (
2838     rados_write_op_t write_op,
2839     rados_ioctx_t io,
2840     const(char)* oid,
2841     time_t* mtime,
2842     int flags);
2843 
2844 /**
2845  * Perform a write operation synchronously
2846  * @param write_op operation to perform
2847  * @param io the ioctx that the object is in
2848  * @param oid the object id
2849  * @param mtime the time to set the mtime to, NULL for the current time
2850  * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
2851  */
2852 
2853 int rados_write_op_operate2 (
2854     rados_write_op_t write_op,
2855     rados_ioctx_t io,
2856     const(char)* oid,
2857     timespec* mtime,
2858     int flags);
2859 
2860 /**
2861  * Perform a write operation asynchronously
2862  * @param write_op operation to perform
2863  * @param io the ioctx that the object is in
2864  * @param completion what to do when operation has been attempted
2865  * @param oid the object id
2866  * @param mtime the time to set the mtime to, NULL for the current time
2867  * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
2868  */
2869 int rados_aio_write_op_operate (
2870     rados_write_op_t write_op,
2871     rados_ioctx_t io,
2872     rados_completion_t completion,
2873     const(char)* oid,
2874     time_t* mtime,
2875     int flags);
2876 
2877 /**
2878  * Create a new rados_read_op_t write operation. This will store all
2879  * actions to be performed atomically. You must call
2880  * rados_release_read_op when you are finished with it (after it
2881  * completes, or you decide not to send it in the first place).
2882  *
2883  * @returns non-NULL on success, NULL on memory allocation error.
2884  */
2885 rados_read_op_t rados_create_read_op ();
2886 
2887 /**
2888  * Free a rados_read_op_t, must be called when you're done with it.
2889  * @param read_op operation to deallocate, created with rados_create_read_op
2890  */
2891 void rados_release_read_op (rados_read_op_t read_op);
2892 
2893 /**
2894  * Set flags for the last operation added to this read_op.
2895  * At least one op must have been added to the read_op.
2896  * @param flags see librados.h constants beginning with LIBRADOS_OP_FLAG
2897  */
2898 void rados_read_op_set_flags (rados_read_op_t read_op, int flags);
2899 
2900 /**
2901  * Ensure that the object exists before reading
2902  * @param read_op operation to add this action to
2903  */
2904 void rados_read_op_assert_exists (rados_read_op_t read_op);
2905 
2906 /**
2907  * Ensure that the object exists and that its internal version
2908  * number is equal to "ver" before reading. "ver" should be a
2909  * version number previously obtained with rados_get_last_version().
2910  * - If the object's version is greater than the asserted version
2911  *   then rados_read_op_operate will return -ERANGE instead of
2912  *   executing the op.
2913  * - If the object's version is less than the asserted version
2914  *   then rados_read_op_operate will return -EOVERFLOW instead
2915  *   of executing the op.
2916  * @param read_op operation to add this action to
2917  * @param ver object version number
2918  */
2919 void rados_read_op_assert_version (rados_read_op_t read_op, ulong ver);
2920 
2921 /**
2922  * Ensure that the an xattr satisfies a comparison
2923  * If the comparison is not satisfied, the return code of the
2924  * operation will be -ECANCELED
2925  * @param read_op operation to add this action to
2926  * @param name name of the xattr to look up
2927  * @param comparison_operator currently undocumented, look for
2928  * LIBRADOS_CMPXATTR_OP_EQ in librados.h
2929  * @param value buffer to compare actual xattr value to
2930  * @param value_len length of buffer to compare actual xattr value to
2931  */
2932 void rados_read_op_cmpxattr (
2933     rados_read_op_t read_op,
2934     const(char)* name,
2935     ubyte comparison_operator,
2936     const(char)* value,
2937     size_t value_len);
2938 
2939 /**
2940  * Start iterating over xattrs on an object.
2941  *
2942  * @param read_op operation to add this action to
2943  * @param iter where to store the iterator
2944  * @param prval where to store the return value of this action
2945  */
2946 void rados_read_op_getxattrs (
2947     rados_read_op_t read_op,
2948     rados_xattrs_iter_t* iter,
2949     int* prval);
2950 
2951 /**
2952  * Ensure that the an omap value satisfies a comparison,
2953  * with the supplied value on the right hand side (i.e.
2954  * for OP_LT, the comparison is actual_value < value.
2955  *
2956  * @param read_op operation to add this action to
2957  * @param key which omap value to compare
2958  * @param comparison_operator one of LIBRADOS_CMPXATTR_OP_EQ,
2959    LIBRADOS_CMPXATTR_OP_LT, or LIBRADOS_CMPXATTR_OP_GT
2960  * @param val value to compare with
2961  * @param val_len length of value in bytes
2962  * @param prval where to store the return value from this action
2963  */
2964 void rados_read_op_omap_cmp (
2965     rados_read_op_t read_op,
2966     const(char)* key,
2967     ubyte comparison_operator,
2968     const(char)* val,
2969     size_t val_len,
2970     int* prval);
2971 
2972 /**
2973  * Get object size and mtime
2974  * @param read_op operation to add this action to
2975  * @param psize where to store object size
2976  * @param pmtime where to store modification time
2977  * @param prval where to store the return value of this action
2978  */
2979 void rados_read_op_stat (
2980     rados_read_op_t read_op,
2981     ulong* psize,
2982     time_t* pmtime,
2983     int* prval);
2984 
2985 /**
2986  * Read bytes from offset into buffer.
2987  *
2988  * prlen will be filled with the number of bytes read if successful.
2989  * A short read can only occur if the read reaches the end of the
2990  * object.
2991  *
2992  * @param read_op operation to add this action to
2993  * @param offset offset to read from
2994  * @param len length of buffer
2995  * @param buffer where to put the data
2996  * @param bytes_read where to store the number of bytes read by this action
2997  * @param prval where to store the return value of this action
2998  */
2999 void rados_read_op_read (
3000     rados_read_op_t read_op,
3001     ulong offset,
3002     size_t len,
3003     char* buffer,
3004     size_t* bytes_read,
3005     int* prval);
3006 
3007 /**
3008  * Execute an OSD class method on an object
3009  * See rados_exec() for general description.
3010  *
3011  * The output buffer is allocated on the heap; the caller is
3012  * expected to release that memory with rados_buffer_free(). The
3013  * buffer and length pointers can all be NULL, in which case they are
3014  * not filled in.
3015  *
3016  * @param read_op operation to add this action to
3017  * @param cls the name of the class
3018  * @param method the name of the method
3019  * @param in_buf where to find input
3020  * @param in_len length of in_buf in bytes
3021  * @param out_buf where to put librados-allocated output buffer
3022  * @param out_len length of out_buf in bytes
3023  * @param prval where to store the return value from the method
3024  */
3025 void rados_read_op_exec (
3026     rados_read_op_t read_op,
3027     const(char)* cls,
3028     const(char)* method,
3029     const(char)* in_buf,
3030     size_t in_len,
3031     char** out_buf,
3032     size_t* out_len,
3033     int* prval);
3034 
3035 /**
3036  * Execute an OSD class method on an object
3037  * See rados_exec() for general description.
3038  *
3039  * If the output buffer is too small, prval will
3040  * be set to -ERANGE and used_len will be 0.
3041  *
3042  * @param read_op operation to add this action to
3043  * @param cls the name of the class
3044  * @param method the name of the method
3045  * @param in_buf where to find input
3046  * @param in_len length of in_buf in bytes
3047  * @param out_buf user-provided buffer to read into
3048  * @param out_len length of out_buf in bytes
3049  * @param used_len where to store the number of bytes read into out_buf
3050  * @param prval where to store the return value from the method
3051  */
3052 void rados_read_op_exec_user_buf (
3053     rados_read_op_t read_op,
3054     const(char)* cls,
3055     const(char)* method,
3056     const(char)* in_buf,
3057     size_t in_len,
3058     char* out_buf,
3059     size_t out_len,
3060     size_t* used_len,
3061     int* prval);
3062 
3063 /**
3064  * Start iterating over key/value pairs on an object.
3065  *
3066  * They will be returned sorted by key.
3067  *
3068  * @param read_op operation to add this action to
3069  * @param start_after list keys starting after start_after
3070  * @param filter_prefix list only keys beginning with filter_prefix
3071  * @param max_return list no more than max_return key/value pairs
3072  * @param iter where to store the iterator
3073  * @param prval where to store the return value from this action
3074  */
3075 void rados_read_op_omap_get_vals (
3076     rados_read_op_t read_op,
3077     const(char)* start_after,
3078     const(char)* filter_prefix,
3079     ulong max_return,
3080     rados_omap_iter_t* iter,
3081     int* prval);
3082 
3083 /**
3084  * Start iterating over keys on an object.
3085  *
3086  * They will be returned sorted by key, and the iterator
3087  * will fill in NULL for all values if specified.
3088  *
3089  * @param read_op operation to add this action to
3090  * @param start_after list keys starting after start_after
3091  * @param max_return list no more than max_return keys
3092  * @param iter where to store the iterator
3093  * @param prval where to store the return value from this action
3094  */
3095 void rados_read_op_omap_get_keys (
3096     rados_read_op_t read_op,
3097     const(char)* start_after,
3098     ulong max_return,
3099     rados_omap_iter_t* iter,
3100     int* prval);
3101 
3102 /**
3103  * Start iterating over specific key/value pairs
3104  *
3105  * They will be returned sorted by key.
3106  *
3107  * @param read_op operation to add this action to
3108  * @param keys array of pointers to null-terminated keys to get
3109  * @param keys_len the number of strings in keys
3110  * @param iter where to store the iterator
3111  * @param prval where to store the return value from this action
3112  */
3113 void rados_read_op_omap_get_vals_by_keys (
3114     rados_read_op_t read_op,
3115     const(char*)* keys,
3116     size_t keys_len,
3117     rados_omap_iter_t* iter,
3118     int* prval);
3119 
3120 /**
3121  * Perform a read operation synchronously
3122  * @param read_op operation to perform
3123  * @param io the ioctx that the object is in
3124  * @param oid the object id
3125  * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3126  */
3127 int rados_read_op_operate (
3128     rados_read_op_t read_op,
3129     rados_ioctx_t io,
3130     const(char)* oid,
3131     int flags);
3132 
3133 /**
3134  * Perform a read operation asynchronously
3135  * @param read_op operation to perform
3136  * @param io the ioctx that the object is in
3137  * @param completion what to do when operation has been attempted
3138  * @param oid the object id
3139  * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3140  */
3141 int rados_aio_read_op_operate (
3142     rados_read_op_t read_op,
3143     rados_ioctx_t io,
3144     rados_completion_t completion,
3145     const(char)* oid,
3146     int flags);
3147 
3148 /** @} Object Operations */
3149 
3150 /**
3151  * Take an exclusive lock on an object.
3152  *
3153  * @param io the context to operate in
3154  * @param oid the name of the object
3155  * @param name the name of the lock
3156  * @param cookie user-defined identifier for this instance of the lock
3157  * @param desc user-defined lock description
3158  * @param duration the duration of the lock. Set to NULL for infinite duration.
3159  * @param flags lock flags
3160  * @returns 0 on success, negative error code on failure
3161  * @returns -EBUSY if the lock is already held by another (client, cookie) pair
3162  * @returns -EEXIST if the lock is already held by the same (client, cookie) pair
3163  */
3164 int rados_lock_exclusive (
3165     rados_ioctx_t io,
3166     const(char)* oid,
3167     const(char)* name,
3168     const(char)* cookie,
3169     const(char)* desc,
3170     timeval* duration,
3171     ubyte flags);
3172 
3173 /**
3174  * Take a shared lock on an object.
3175  *
3176  * @param io the context to operate in
3177  * @param o the name of the object
3178  * @param name the name of the lock
3179  * @param cookie user-defined identifier for this instance of the lock
3180  * @param tag The tag of the lock
3181  * @param desc user-defined lock description
3182  * @param duration the duration of the lock. Set to NULL for infinite duration.
3183  * @param flags lock flags
3184  * @returns 0 on success, negative error code on failure
3185  * @returns -EBUSY if the lock is already held by another (client, cookie) pair
3186  * @returns -EEXIST if the lock is already held by the same (client, cookie) pair
3187  */
3188 int rados_lock_shared (
3189     rados_ioctx_t io,
3190     const(char)* o,
3191     const(char)* name,
3192     const(char)* cookie,
3193     const(char)* tag,
3194     const(char)* desc,
3195     timeval* duration,
3196     ubyte flags);
3197 
3198 /**
3199  * Release a shared or exclusive lock on an object.
3200  *
3201  * @param io the context to operate in
3202  * @param o the name of the object
3203  * @param name the name of the lock
3204  * @param cookie user-defined identifier for the instance of the lock
3205  * @returns 0 on success, negative error code on failure
3206  * @returns -ENOENT if the lock is not held by the specified (client, cookie) pair
3207  */
3208 int rados_unlock (
3209     rados_ioctx_t io,
3210     const(char)* o,
3211     const(char)* name,
3212     const(char)* cookie);
3213 
3214 /**
3215  * List clients that have locked the named object lock and information about
3216  * the lock.
3217  *
3218  * The number of bytes required in each buffer is put in the
3219  * corresponding size out parameter. If any of the provided buffers
3220  * are too short, -ERANGE is returned after these sizes are filled in.
3221  *
3222  * @param io the context to operate in
3223  * @param o the name of the object
3224  * @param name the name of the lock
3225  * @param exclusive where to store whether the lock is exclusive (1) or shared (0)
3226  * @param tag where to store the tag associated with the object lock
3227  * @param tag_len number of bytes in tag buffer
3228  * @param clients buffer in which locker clients are stored, separated by '\0'
3229  * @param clients_len number of bytes in the clients buffer
3230  * @param cookies buffer in which locker cookies are stored, separated by '\0'
3231  * @param cookies_len number of bytes in the cookies buffer
3232  * @param addrs buffer in which locker addresses are stored, separated by '\0'
3233  * @param addrs_len number of bytes in the clients buffer
3234  * @returns number of lockers on success, negative error code on failure
3235  * @returns -ERANGE if any of the buffers are too short
3236  */
3237 ssize_t rados_list_lockers (
3238     rados_ioctx_t io,
3239     const(char)* o,
3240     const(char)* name,
3241     int* exclusive,
3242     char* tag,
3243     size_t* tag_len,
3244     char* clients,
3245     size_t* clients_len,
3246     char* cookies,
3247     size_t* cookies_len,
3248     char* addrs,
3249     size_t* addrs_len);
3250 
3251 /**
3252  * Releases a shared or exclusive lock on an object, which was taken by the
3253  * specified client.
3254  *
3255  * @param io the context to operate in
3256  * @param o the name of the object
3257  * @param name the name of the lock
3258  * @param client the client currently holding the lock
3259  * @param cookie user-defined identifier for the instance of the lock
3260  * @returns 0 on success, negative error code on failure
3261  * @returns -ENOENT if the lock is not held by the specified (client, cookie) pair
3262  * @returns -EINVAL if the client cannot be parsed
3263  */
3264 int rados_break_lock (
3265     rados_ioctx_t io,
3266     const(char)* o,
3267     const(char)* name,
3268     const(char)* client,
3269     const(char)* cookie);
3270 
3271 /**
3272  * Blacklists the specified client from the OSDs
3273  *
3274  * @param cluster cluster handle
3275  * @param client_address client address
3276  * @param expire_seconds number of seconds to blacklist (0 for default)
3277  * @returns 0 on success, negative error code on failure
3278  */
3279 int rados_blacklist_add (
3280     rados_t cluster,
3281     char* client_address,
3282     uint expire_seconds);
3283 
3284 /**
3285  * @name Mon/OSD/PG Commands
3286  *
3287  * These interfaces send commands relating to the monitor, OSD, or PGs.
3288  *
3289  * @{
3290  */
3291 
3292 /**
3293  * Send monitor command.
3294  *
3295  * @note Takes command string in carefully-formatted JSON; must match
3296  * defined commands, types, etc.
3297  *
3298  * The result buffers are allocated on the heap; the caller is
3299  * expected to release that memory with rados_buffer_free().  The
3300  * buffer and length pointers can all be NULL, in which case they are
3301  * not filled in.
3302  *
3303  * @param cluster cluster handle
3304  * @param cmd an array of char *'s representing the command
3305  * @param cmdlen count of valid entries in cmd
3306  * @param inbuf any bulk input data (crush map, etc.)
3307  * @param outbuf double pointer to output buffer
3308  * @param outbuflen pointer to output buffer length
3309  * @param outs double pointer to status string
3310  * @param outslen pointer to status string length
3311  * @returns 0 on success, negative error code on failure
3312  */
3313 int rados_mon_command (
3314     rados_t cluster,
3315     const(char*)* cmd,
3316     size_t cmdlen,
3317     const(char)* inbuf,
3318     size_t inbuflen,
3319     char** outbuf,
3320     size_t* outbuflen,
3321     char** outs,
3322     size_t* outslen);
3323 
3324 /**
3325  * Send monitor command to a specific monitor.
3326  *
3327  * @note Takes command string in carefully-formatted JSON; must match
3328  * defined commands, types, etc.
3329  *
3330  * The result buffers are allocated on the heap; the caller is
3331  * expected to release that memory with rados_buffer_free().  The
3332  * buffer and length pointers can all be NULL, in which case they are
3333  * not filled in.
3334  *
3335  * @param cluster cluster handle
3336  * @param name target monitor's name
3337  * @param cmd an array of char *'s representing the command
3338  * @param cmdlen count of valid entries in cmd
3339  * @param inbuf any bulk input data (crush map, etc.)
3340  * @param outbuf double pointer to output buffer
3341  * @param outbuflen pointer to output buffer length
3342  * @param outs double pointer to status string
3343  * @param outslen pointer to status string length
3344  * @returns 0 on success, negative error code on failure
3345  */
3346 int rados_mon_command_target (
3347     rados_t cluster,
3348     const(char)* name,
3349     const(char*)* cmd,
3350     size_t cmdlen,
3351     const(char)* inbuf,
3352     size_t inbuflen,
3353     char** outbuf,
3354     size_t* outbuflen,
3355     char** outs,
3356     size_t* outslen);
3357 
3358 /**
3359  * free a rados-allocated buffer
3360  *
3361  * Release memory allocated by librados calls like rados_mon_command().
3362  *
3363  * @param buf buffer pointer
3364  */
3365 void rados_buffer_free (char* buf);
3366 
3367 int rados_osd_command (
3368     rados_t cluster,
3369     int osdid,
3370     const(char*)* cmd,
3371     size_t cmdlen,
3372     const(char)* inbuf,
3373     size_t inbuflen,
3374     char** outbuf,
3375     size_t* outbuflen,
3376     char** outs,
3377     size_t* outslen);
3378 
3379 int rados_pg_command (
3380     rados_t cluster,
3381     const(char)* pgstr,
3382     const(char*)* cmd,
3383     size_t cmdlen,
3384     const(char)* inbuf,
3385     size_t inbuflen,
3386     char** outbuf,
3387     size_t* outbuflen,
3388     char** outs,
3389     size_t* outslen);
3390 
3391 /*
3392  * This is not a doxygen comment leadin, because doxygen breaks on
3393  * a typedef with function params and returns, and I can't figure out
3394  * how to fix it.
3395  *
3396  * Monitor cluster log
3397  *
3398  * Monitor events logged to the cluster log.  The callback get each
3399  * log entry both as a single formatted line and with each field in a
3400  * separate arg.
3401  *
3402  * Calling with a cb argument of NULL will deregister any previously
3403  * registered callback.
3404  *
3405  * @param cluster cluster handle
3406  * @param level minimum log level (debug, info, warn|warning, err|error)
3407  * @param cb callback to run for each log message. It MUST NOT block
3408  * nor call back into librados.
3409  * @param arg void argument to pass to cb
3410  *
3411  * @returns 0 on success, negative code on error
3412  */
3413 alias void function (void* arg, const(char)* line, const(char)* who, ulong sec, ulong nsec, ulong seq, const(char)* level, const(char)* msg) rados_log_callback_t;
3414 
3415 int rados_monitor_log (
3416     rados_t cluster,
3417     const(char)* level,
3418     rados_log_callback_t cb,
3419     void* arg);
3420 
3421 /** @} Mon/OSD/PG commands */
3422