33#include <asm/unistd.h>
44#include <sysdep/stub.h>
55#include <stub-data.h>
6+ #include <linux/filter.h>
7+ #include <linux/seccomp.h>
8+ #include <generated/asm-offsets.h>
69
710void _start (void );
811
@@ -25,8 +28,6 @@ noinline static void real_init(void)
2528 } sa = {
2629 /* Need to set SA_RESTORER (but the handler never returns) */
2730 .sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000 ,
28- /* no need to mask any signals */
29- .sa_mask = 0 ,
3031 };
3132
3233 /* set a nice name */
@@ -35,6 +36,9 @@ noinline static void real_init(void)
3536 /* Make sure this process dies if the kernel dies */
3637 stub_syscall2 (__NR_prctl , PR_SET_PDEATHSIG , SIGKILL );
3738
39+ /* Needed in SECCOMP mode (and safe to do anyway) */
40+ stub_syscall5 (__NR_prctl , PR_SET_NO_NEW_PRIVS , 1 , 0 , 0 , 0 );
41+
3842 /* read information from STDIN and close it */
3943 res = stub_syscall3 (__NR_read , 0 ,
4044 (unsigned long )& init_data , sizeof (init_data ));
@@ -63,18 +67,133 @@ noinline static void real_init(void)
6367 stack .ss_sp = (void * )init_data .stub_start + UM_KERN_PAGE_SIZE ;
6468 stub_syscall2 (__NR_sigaltstack , (unsigned long )& stack , 0 );
6569
66- /* register SIGSEGV handler */
67- sa .sa_handler_ = (void * ) init_data .segv_handler ;
68- res = stub_syscall4 (__NR_rt_sigaction , SIGSEGV , (unsigned long )& sa , 0 ,
69- sizeof (sa .sa_mask ));
70- if (res != 0 )
71- stub_syscall1 (__NR_exit , 13 );
70+ /* register signal handlers */
71+ sa .sa_handler_ = (void * ) init_data .signal_handler ;
72+ sa .sa_restorer = (void * ) init_data .signal_restorer ;
73+ if (!init_data .seccomp ) {
74+ /* In ptrace mode, the SIGSEGV handler never returns */
75+ sa .sa_mask = 0 ;
76+
77+ res = stub_syscall4 (__NR_rt_sigaction , SIGSEGV ,
78+ (unsigned long )& sa , 0 , sizeof (sa .sa_mask ));
79+ if (res != 0 )
80+ stub_syscall1 (__NR_exit , 13 );
81+ } else {
82+ /* SECCOMP mode uses rt_sigreturn, need to mask all signals */
83+ sa .sa_mask = ~0ULL ;
84+
85+ res = stub_syscall4 (__NR_rt_sigaction , SIGSEGV ,
86+ (unsigned long )& sa , 0 , sizeof (sa .sa_mask ));
87+ if (res != 0 )
88+ stub_syscall1 (__NR_exit , 14 );
89+
90+ res = stub_syscall4 (__NR_rt_sigaction , SIGSYS ,
91+ (unsigned long )& sa , 0 , sizeof (sa .sa_mask ));
92+ if (res != 0 )
93+ stub_syscall1 (__NR_exit , 15 );
94+
95+ res = stub_syscall4 (__NR_rt_sigaction , SIGALRM ,
96+ (unsigned long )& sa , 0 , sizeof (sa .sa_mask ));
97+ if (res != 0 )
98+ stub_syscall1 (__NR_exit , 16 );
99+
100+ res = stub_syscall4 (__NR_rt_sigaction , SIGTRAP ,
101+ (unsigned long )& sa , 0 , sizeof (sa .sa_mask ));
102+ if (res != 0 )
103+ stub_syscall1 (__NR_exit , 17 );
104+
105+ res = stub_syscall4 (__NR_rt_sigaction , SIGILL ,
106+ (unsigned long )& sa , 0 , sizeof (sa .sa_mask ));
107+ if (res != 0 )
108+ stub_syscall1 (__NR_exit , 18 );
109+
110+ res = stub_syscall4 (__NR_rt_sigaction , SIGFPE ,
111+ (unsigned long )& sa , 0 , sizeof (sa .sa_mask ));
112+ if (res != 0 )
113+ stub_syscall1 (__NR_exit , 19 );
114+ }
115+
116+ /*
117+ * If in seccomp mode, install the SECCOMP filter and trigger a syscall.
118+ * Otherwise set PTRACE_TRACEME and do a SIGSTOP.
119+ */
120+ if (init_data .seccomp ) {
121+ struct sock_filter filter [] = {
122+ #if __BITS_PER_LONG > 32
123+ /* [0] Load upper 32bit of instruction pointer from seccomp_data */
124+ BPF_STMT (BPF_LD | BPF_W | BPF_ABS ,
125+ (offsetof(struct seccomp_data , instruction_pointer ) + 4 )),
126+
127+ /* [1] Jump forward 3 instructions if the upper address is not identical */
128+ BPF_JUMP (BPF_JMP | BPF_JEQ | BPF_K , (init_data .stub_start ) >> 32 , 0 , 3 ),
129+ #endif
130+ /* [2] Load lower 32bit of instruction pointer from seccomp_data */
131+ BPF_STMT (BPF_LD | BPF_W | BPF_ABS ,
132+ (offsetof(struct seccomp_data , instruction_pointer ))),
133+
134+ /* [3] Mask out lower bits */
135+ BPF_STMT (BPF_ALU | BPF_AND | BPF_K , 0xfffff000 ),
136+
137+ /* [4] Jump to [6] if the lower bits are not on the expected page */
138+ BPF_JUMP (BPF_JMP | BPF_JEQ | BPF_K , (init_data .stub_start ) & 0xfffff000 , 1 , 0 ),
139+
140+ /* [5] Trap call, allow */
141+ BPF_STMT (BPF_RET | BPF_K , SECCOMP_RET_TRAP ),
142+
143+ /* [6,7] Check architecture */
144+ BPF_STMT (BPF_LD | BPF_W | BPF_ABS ,
145+ offsetof(struct seccomp_data , arch )),
146+ BPF_JUMP (BPF_JMP | BPF_JEQ | BPF_K ,
147+ UM_SECCOMP_ARCH_NATIVE , 1 , 0 ),
148+
149+ /* [8] Kill (for architecture check) */
150+ BPF_STMT (BPF_RET | BPF_K , SECCOMP_RET_KILL_PROCESS ),
151+
152+ /* [9] Load syscall number */
153+ BPF_STMT (BPF_LD | BPF_W | BPF_ABS ,
154+ offsetof(struct seccomp_data , nr )),
155+
156+ /* [10-14] Check against permitted syscalls */
157+ BPF_JUMP (BPF_JMP | BPF_JEQ | BPF_K , __NR_futex ,
158+ 5 , 0 ),
159+ BPF_JUMP (BPF_JMP | BPF_JEQ | BPF_K , STUB_MMAP_NR ,
160+ 4 , 0 ),
161+ BPF_JUMP (BPF_JMP | BPF_JEQ | BPF_K , __NR_munmap ,
162+ 3 , 0 ),
163+ #ifdef __i386__
164+ BPF_JUMP (BPF_JMP | BPF_JEQ | BPF_K , __NR_set_thread_area ,
165+ 2 , 0 ),
166+ #else
167+ BPF_JUMP (BPF_JMP | BPF_JEQ | BPF_K , __NR_arch_prctl ,
168+ 2 , 0 ),
169+ #endif
170+ BPF_JUMP (BPF_JMP | BPF_JEQ | BPF_K , __NR_rt_sigreturn ,
171+ 1 , 0 ),
172+
173+ /* [15] Not one of the permitted syscalls */
174+ BPF_STMT (BPF_RET | BPF_K , SECCOMP_RET_KILL_PROCESS ),
175+
176+ /* [16] Permitted call for the stub */
177+ BPF_STMT (BPF_RET | BPF_K , SECCOMP_RET_ALLOW ),
178+ };
179+ struct sock_fprog prog = {
180+ .len = sizeof (filter ) / sizeof (filter [0 ]),
181+ .filter = filter ,
182+ };
183+
184+ if (stub_syscall3 (__NR_seccomp , SECCOMP_SET_MODE_FILTER ,
185+ SECCOMP_FILTER_FLAG_TSYNC ,
186+ (unsigned long )& prog ) != 0 )
187+ stub_syscall1 (__NR_exit , 20 );
72188
73- stub_syscall4 (__NR_ptrace , PTRACE_TRACEME , 0 , 0 , 0 );
189+ /* Fall through, the exit syscall will cause SIGSYS */
190+ } else {
191+ stub_syscall4 (__NR_ptrace , PTRACE_TRACEME , 0 , 0 , 0 );
74192
75- stub_syscall2 (__NR_kill , stub_syscall0 (__NR_getpid ), SIGSTOP );
193+ stub_syscall2 (__NR_kill , stub_syscall0 (__NR_getpid ), SIGSTOP );
194+ }
76195
77- stub_syscall1 (__NR_exit , 14 );
196+ stub_syscall1 (__NR_exit , 30 );
78197
79198 __builtin_unreachable ();
80199}
0 commit comments