Skip to content

Commit 2b75869

Browse files
neilbrowngregkh
authored andcommitted
sysfs/kernfs: allow attributes to request write buffer be pre-allocated.
md/raid allows metadata management to be performed in user-space. A various times, particularly on device failure, the metadata needs to be updated before further writes can be permitted. This means that the user-space program which updates metadata much not block on writeout, and so must not allocate memory. mlockall(MCL_CURRENT|MCL_FUTURE) and pre-allocation can avoid all memory allocation issues for user-memory, but that does not help kernel memory. Several kernel objects can be pre-allocated. e.g. files opened before any writes to the array are permitted. However some kernel allocation happens in places that cannot be pre-allocated. In particular, writes to sysfs files (to tell md that it can now allow writes to the array) allocate a buffer using GFP_KERNEL. This patch allows attributes to be marked as "PREALLOC". In that case the maximal buffer is allocated when the file is opened, and then used on each write instead of allocating a new buffer. As the same buffer is now shared for all writes on the same file description, the mutex is extended to cover full use of the buffer including the copy_from_user(). The new __ATTR_PREALLOC() 'or's a new flag in to the 'mode', which is inspected by sysfs_add_file_mode_ns() to determine if the file should be marked as requiring prealloc. Despite the comment, we *do* use ->seq_show together with ->prealloc in this patch. The next patch fixes that. Signed-off-by: NeilBrown <[email protected]> Reviewed-by: Tejun Heo <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 0936896 commit 2b75869

File tree

4 files changed

+71
-22
lines changed

4 files changed

+71
-22
lines changed

fs/kernfs/file.c

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
106106
const struct kernfs_ops *ops;
107107

108108
/*
109-
* @of->mutex nests outside active ref and is just to ensure that
109+
* @of->mutex nests outside active ref and is primarily to ensure that
110110
* the ops aren't called concurrently for the same open file.
111111
*/
112112
mutex_lock(&of->mutex);
@@ -194,7 +194,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
194194
return -ENOMEM;
195195

196196
/*
197-
* @of->mutex nests outside active ref and is just to ensure that
197+
* @of->mutex nests outside active ref and is primarily to ensure that
198198
* the ops aren't called concurrently for the same open file.
199199
*/
200200
mutex_lock(&of->mutex);
@@ -278,19 +278,16 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
278278
len = min_t(size_t, count, PAGE_SIZE);
279279
}
280280

281-
buf = kmalloc(len + 1, GFP_KERNEL);
281+
buf = of->prealloc_buf;
282+
if (!buf)
283+
buf = kmalloc(len + 1, GFP_KERNEL);
282284
if (!buf)
283285
return -ENOMEM;
284286

285-
if (copy_from_user(buf, user_buf, len)) {
286-
len = -EFAULT;
287-
goto out_free;
288-
}
289-
buf[len] = '\0'; /* guarantee string termination */
290-
291287
/*
292-
* @of->mutex nests outside active ref and is just to ensure that
293-
* the ops aren't called concurrently for the same open file.
288+
* @of->mutex nests outside active ref and is used both to ensure that
289+
* the ops aren't called concurrently for the same open file, and
290+
* to provide exclusive access to ->prealloc_buf (when that exists).
294291
*/
295292
mutex_lock(&of->mutex);
296293
if (!kernfs_get_active(of->kn)) {
@@ -299,19 +296,27 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
299296
goto out_free;
300297
}
301298

299+
if (copy_from_user(buf, user_buf, len)) {
300+
len = -EFAULT;
301+
goto out_unlock;
302+
}
303+
buf[len] = '\0'; /* guarantee string termination */
304+
302305
ops = kernfs_ops(of->kn);
303306
if (ops->write)
304307
len = ops->write(of, buf, len, *ppos);
305308
else
306309
len = -EINVAL;
307310

308-
kernfs_put_active(of->kn);
309-
mutex_unlock(&of->mutex);
310-
311311
if (len > 0)
312312
*ppos += len;
313+
314+
out_unlock:
315+
kernfs_put_active(of->kn);
316+
mutex_unlock(&of->mutex);
313317
out_free:
314-
kfree(buf);
318+
if (buf != of->prealloc_buf)
319+
kfree(buf);
315320
return len;
316321
}
317322

@@ -685,6 +690,14 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
685690
*/
686691
of->atomic_write_len = ops->atomic_write_len;
687692

693+
if (ops->prealloc) {
694+
int len = of->atomic_write_len ?: PAGE_SIZE;
695+
of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL);
696+
error = -ENOMEM;
697+
if (!of->prealloc_buf)
698+
goto err_free;
699+
}
700+
688701
/*
689702
* Always instantiate seq_file even if read access doesn't use
690703
* seq_file or is not requested. This unifies private data access
@@ -715,6 +728,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
715728
err_close:
716729
seq_release(inode, file);
717730
err_free:
731+
kfree(of->prealloc_buf);
718732
kfree(of);
719733
err_out:
720734
kernfs_put_active(kn);
@@ -728,6 +742,7 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
728742

729743
kernfs_put_open_node(kn, of);
730744
seq_release(inode, filp);
745+
kfree(of->prealloc_buf);
731746
kfree(of);
732747

733748
return 0;

fs/sysfs/file.c

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,17 @@ static const struct kernfs_ops sysfs_file_kfops_rw = {
184184
.write = sysfs_kf_write,
185185
};
186186

187+
static const struct kernfs_ops sysfs_prealloc_kfops_wo = {
188+
.write = sysfs_kf_write,
189+
.prealloc = true,
190+
};
191+
192+
static const struct kernfs_ops sysfs_prealloc_kfops_rw = {
193+
.seq_show = sysfs_kf_seq_show,
194+
.write = sysfs_kf_write,
195+
.prealloc = true,
196+
};
197+
187198
static const struct kernfs_ops sysfs_bin_kfops_ro = {
188199
.read = sysfs_kf_bin_read,
189200
};
@@ -222,13 +233,19 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
222233
kobject_name(kobj)))
223234
return -EINVAL;
224235

225-
if (sysfs_ops->show && sysfs_ops->store)
226-
ops = &sysfs_file_kfops_rw;
227-
else if (sysfs_ops->show)
236+
if (sysfs_ops->show && sysfs_ops->store) {
237+
if (mode & SYSFS_PREALLOC)
238+
ops = &sysfs_prealloc_kfops_rw;
239+
else
240+
ops = &sysfs_file_kfops_rw;
241+
} else if (sysfs_ops->show)
228242
ops = &sysfs_file_kfops_ro;
229-
else if (sysfs_ops->store)
230-
ops = &sysfs_file_kfops_wo;
231-
else
243+
else if (sysfs_ops->store) {
244+
if (mode & SYSFS_PREALLOC)
245+
ops = &sysfs_prealloc_kfops_wo;
246+
else
247+
ops = &sysfs_file_kfops_wo;
248+
} else
232249
ops = &sysfs_file_kfops_empty;
233250

234251
size = PAGE_SIZE;
@@ -253,7 +270,7 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
253270
if (!attr->ignore_lockdep)
254271
key = attr->key ?: (struct lock_class_key *)&attr->skey;
255272
#endif
256-
kn = __kernfs_create_file(parent, attr->name, mode, size, ops,
273+
kn = __kernfs_create_file(parent, attr->name, mode & 0777, size, ops,
257274
(void *)attr, ns, true, key);
258275
if (IS_ERR(kn)) {
259276
if (PTR_ERR(kn) == -EEXIST)

include/linux/kernfs.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ struct kernfs_open_file {
179179
struct mutex mutex;
180180
int event;
181181
struct list_head list;
182+
char *prealloc_buf;
182183

183184
size_t atomic_write_len;
184185
bool mmapped;
@@ -214,6 +215,13 @@ struct kernfs_ops {
214215
* larger ones are rejected with -E2BIG.
215216
*/
216217
size_t atomic_write_len;
218+
/*
219+
* "prealloc" causes a buffer to be allocated at open for
220+
* all read/write requests. As ->seq_show uses seq_read()
221+
* which does its own allocation, it is incompatible with
222+
* ->prealloc. Provide ->read and ->write with ->prealloc.
223+
*/
224+
bool prealloc;
217225
ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes,
218226
loff_t off);
219227

include/linux/sysfs.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,22 @@ struct attribute_group {
7070
* for examples..
7171
*/
7272

73+
#define SYSFS_PREALLOC 010000
74+
7375
#define __ATTR(_name, _mode, _show, _store) { \
7476
.attr = {.name = __stringify(_name), \
7577
.mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \
7678
.show = _show, \
7779
.store = _store, \
7880
}
7981

82+
#define __ATTR_PREALLOC(_name, _mode, _show, _store) { \
83+
.attr = {.name = __stringify(_name), \
84+
.mode = SYSFS_PREALLOC | VERIFY_OCTAL_PERMISSIONS(_mode) },\
85+
.show = _show, \
86+
.store = _store, \
87+
}
88+
8089
#define __ATTR_RO(_name) { \
8190
.attr = { .name = __stringify(_name), .mode = S_IRUGO }, \
8291
.show = _name##_show, \

0 commit comments

Comments
 (0)