From 6583af4bbb3547b302963922279e0516edd9d6b4 Mon Sep 17 00:00:00 2001 From: Louis Gesbert Date: Fri, 27 Oct 2023 14:26:29 +0200 Subject: [PATCH 1/3] fix(grader): avoid errors with too many open files on parallel builds --- src/repo/learnocaml_process_exercise_repository.ml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/repo/learnocaml_process_exercise_repository.ml b/src/repo/learnocaml_process_exercise_repository.ml index 4b346f7a9..c97d5e03a 100644 --- a/src/repo/learnocaml_process_exercise_repository.ml +++ b/src/repo/learnocaml_process_exercise_repository.ml @@ -59,7 +59,7 @@ let print_grader_error exercise = function let spawn_grader dump_outputs dump_reports - ?print_result ?dirname meta exercise output_json = + ?print_result ?dirname meta ex_dir output_json = let rec sleep () = if !n_processes <= 0 then Lwt_main.yield () >>= sleep @@ -69,6 +69,7 @@ let spawn_grader in sleep () >>= fun () -> Lwt.catch (fun () -> + read_exercise ex_dir >>= fun exercise -> Grader_cli.grade ~dump_outputs ~dump_reports ~display_callback:false ?print_result ?dirname meta exercise output_json @@ -213,7 +214,8 @@ let main dest_dir = if !n_processes = 1 then Lwt_list.map_s, fun dump_outputs dump_reports ?print_result ?dirname - meta exercise json_path -> + meta ex_dir json_path -> + read_exercise ex_dir >>= fun exercise -> Grader_cli.grade ~dump_outputs ~dump_reports ~display_callback:true ?print_result ?dirname @@ -239,10 +241,8 @@ let main dest_dir = end else begin Learnocaml_precompile_exercise.precompile ~exercise_dir:ex_dir >>= fun () -> - read_exercise ex_dir - >>= fun exercise -> grade dump_outputs dump_reports - ~dirname:ex_dir (Index.find index id) exercise (Some json_path) + ~dirname:ex_dir (Index.find index id) ex_dir (Some json_path) >>= function | Ok () -> Format.printf "%-24s [OK]@." id ; @@ -256,6 +256,10 @@ let main dest_dir = Lwt.return (List.for_all ((=) true) results)) (fun exn -> let print_unknown ppf = function + | Unix.Unix_error (Unix.EMFILE, _, _) -> + Format.fprintf ppf + "Too many open files. Try reducing the number of concurrent jobs \ + (with the `-j` flag) or use `ulimit -n` with a higher value" | Failure msg -> Format.fprintf ppf "Cannot process exercises: %s" msg | exn -> Format.fprintf ppf "Cannot process exercises: %s" (Printexc.to_string exn) in Json_encoding.print_error ~print_unknown Format.err_formatter exn ; From 82d9bea4db4034857747b4da2e7b3121bd6c2b28 Mon Sep 17 00:00:00 2001 From: Louis Gesbert Date: Fri, 27 Oct 2023 19:55:43 +0200 Subject: [PATCH 2/3] feat(server): add a `--replace` option Closes #529 which seemed to be a common complaint among teachers. * `learn-ocaml serve --replace` will kill an existing server (running on the same port) just before starting * `learn-ocaml build serve` with an existing server on the same port will fail fast (before actually doing the build) * `learn-ocaml build serve --replace` is more clever: - it will do the build *in a temporary directory* - then, only if everything is ok, kill the older server - swap the files and start the new server This is all done in order to minimise downtime and be convenient for server updates. Note that this PR sits on top of #481 and should be rebased once it's merged. --- src/main/learnocaml_main.ml | 106 +++++++++++++++--- src/main/learnocaml_server_args.ml | 13 ++- src/main/learnocaml_server_args.mli | 3 +- src/main/learnocaml_server_main.ml | 11 ++ .../learnocaml_process_exercise_repository.ml | 2 +- src/server/learnocaml_server.ml | 32 ++++++ src/server/learnocaml_server.mli | 9 +- 7 files changed, 155 insertions(+), 21 deletions(-) diff --git a/src/main/learnocaml_main.ml b/src/main/learnocaml_main.ml index 4bc49bad4..c09091920 100644 --- a/src/main/learnocaml_main.ml +++ b/src/main/learnocaml_main.ml @@ -292,9 +292,15 @@ let process_html_file orig_file dest_file base_url no_secret = Lwt_io.close ofile >>= fun () -> Lwt_io.close wfile +let temp_app_dir o = + let open Filename in + concat + (dirname o.app_dir) + ((basename o.app_dir) ^ ".temp") + let main o = - Printf.printf "Learnocaml v.%s running.\n" Learnocaml_api.version; - let grade () = + Printf.printf "Learnocaml v.%s running.\n%!" Learnocaml_api.version; + let grade o = if List.mem Grade o.commands then (if List.mem Build o.commands || List.mem Serve o.commands then failwith "The 'grade' command is incompatible with 'build' and \ @@ -322,9 +328,34 @@ let main o = >|= fun i -> Some i) else Lwt.return_none in - let generate () = + let generate o = if List.mem Build o.commands then - (Printf.printf "Updating app at %s\n%!" o.app_dir; + (let get_app_dir o = + if not (List.mem Serve o.commands) then + Lwt.return o.app_dir + else if o.server.Server.replace then + let app_dir = temp_app_dir o in + (if Sys.file_exists app_dir then + (Printf.eprintf "Warning: temporary directory %s already exists\n%!" + app_dir; + Lwt.return_unit) + else if Sys.file_exists o.app_dir then + Lwt_utils.copy_tree o.app_dir app_dir + else + Lwt.return_unit) + >>= fun () -> Lwt.return app_dir + else if Learnocaml_server.check_running () <> None then + (Printf.eprintf + "Error: another server is already running on port %d \ + (consider using option `--replace`)\n%!" + !Learnocaml_server.port; + exit 10) + else Lwt.return o.app_dir + in + get_app_dir o >>= fun app_dir -> + let o = { o with app_dir } in + Learnocaml_store.static_dir := app_dir; + Printf.printf "Updating app at %s\n%!" o.app_dir; Lwt.catch (fun () -> Lwt_utils.copy_tree o.builder.Builder.contents_dir o.app_dir) (function @@ -404,8 +435,44 @@ let main o = else Lwt.return true in - let run_server () = + let run_server o = if List.mem Serve o.commands then + let () = + if o.server.Server.replace then + let running = Learnocaml_server.check_running () in + Option.iter Learnocaml_server.kill_running running; + let temp = temp_app_dir o in + let app_dir = + if Filename.is_relative o.app_dir + then Filename.concat (Sys.getcwd ()) o.app_dir + else o.app_dir + in + let bak = + let f = + Filename.temp_file + ~temp_dir:(Filename.dirname app_dir) + (Filename.basename app_dir ^ ".bak.") + "" + in + Unix.unlink f; f + in + if Sys.file_exists app_dir then Sys.rename app_dir bak; + Sys.rename temp o.app_dir; + Learnocaml_store.static_dir := app_dir; + if Sys.file_exists bak then + Lwt.dont_wait (fun () -> + Lwt.pause () >>= fun () -> + Lwt_process.exec ("rm",[|"rm";"-rf";bak|]) >>= fun r -> + if r <> Unix.WEXITED 0 then + Lwt.fail_with "Remove command failed" + else Lwt.return_unit + ) + (fun ex -> + Printf.eprintf + "Warning: while cleaning up older application \ + directory %s:\n %s\n%!" + bak (Printexc.to_string ex)) + in let native_server = Sys.executable_name ^ "-server" in if Sys.file_exists native_server then let server_args = @@ -416,30 +483,39 @@ let main o = ("--port="^string_of_int o.server.port) :: (match o.server.cert with None -> [] | Some c -> ["--cert="^c]) in - Unix.execv native_server (Array.of_list (native_server::server_args)) + Lwt.return + (`Continuation + (fun () -> + Unix.execv native_server + (Array.of_list (native_server::server_args)))) else begin Printf.printf "Starting server on port %d\n%!" !Learnocaml_server.port; if o.builder.Builder.base_url <> "" then Printf.printf "Base URL: %s\n%!" o.builder.Builder.base_url; - Learnocaml_server.launch () + Learnocaml_server.launch () >>= fun ret -> + Lwt.return (`Success ret) end else - Lwt.return true + Lwt.return (`Success true) in let ret = Lwt_main.run - (grade () >>= function - | Some i -> Lwt.return i + (grade o >>= function + | Some i -> Lwt.return (`Code i) | None -> - generate () >>= fun success -> + generate o >>= fun success -> if success then - run_server () >>= fun r -> - if r then Lwt.return 0 else Lwt.return 10 + run_server o >>= function + | `Success true -> Lwt.return (`Code 0) + | `Success false -> Lwt.return (`Code 10) + | `Continuation f -> Lwt.return (`Continuation f) else - Lwt.return 1) + Lwt.return (`Code 1)) in - exit ret + match ret with + | `Code n -> exit n + | `Continuation f -> f () let man = let open Manpage in diff --git a/src/main/learnocaml_server_args.ml b/src/main/learnocaml_server_args.ml index f089df0ae..04706025b 100644 --- a/src/main/learnocaml_server_args.ml +++ b/src/main/learnocaml_server_args.ml @@ -19,6 +19,7 @@ module type S = sig base_url: string; port: int; cert: string option; + replace: bool; } val term: string Cmdliner.Term.t -> string Cmdliner.Term.t -> t Cmdliner.Term.t @@ -51,15 +52,21 @@ module Args (SN : Section_name) = struct HTTPS is enabled." default_http_port default_https_port) + let replace = + value & flag & + info ["replace"] ~doc: + "Replace a previously running instance of the server on the same port." + type t = { sync_dir: string; base_url: string; port: int; cert: string option; + replace: bool; } let term app_dir base_url = - let apply app_dir sync_dir base_url port cert = + let apply app_dir sync_dir base_url port cert replace = Learnocaml_store.static_dir := app_dir; Learnocaml_store.sync_dir := sync_dir; let port = match port, cert with @@ -73,10 +80,10 @@ module Args (SN : Section_name) = struct | None -> None); Learnocaml_server.port := port; Learnocaml_server.base_url := base_url; - { sync_dir; base_url; port; cert } + { sync_dir; base_url; port; cert; replace } in (* warning: if you add any options here, remember to pass them through when calling the native server from learn-ocaml main *) - Term.(const apply $ app_dir $ sync_dir $ base_url $ port $ cert) + Term.(const apply $ app_dir $ sync_dir $ base_url $ port $ cert $ replace) end diff --git a/src/main/learnocaml_server_args.mli b/src/main/learnocaml_server_args.mli index a4bfe27f2..c10e3bebd 100644 --- a/src/main/learnocaml_server_args.mli +++ b/src/main/learnocaml_server_args.mli @@ -16,9 +16,10 @@ module type S = sig base_url: string; port: int; cert: string option; + replace: bool; } val term: string Cmdliner.Term.t -> string Cmdliner.Term.t -> t Cmdliner.Term.t end -module Args : functor (_ : Section_name) -> S \ No newline at end of file +module Args : functor (_ : Section_name) -> S diff --git a/src/main/learnocaml_server_main.ml b/src/main/learnocaml_server_main.ml index 00fccac88..3fd57d53b 100644 --- a/src/main/learnocaml_server_main.ml +++ b/src/main/learnocaml_server_main.ml @@ -31,6 +31,17 @@ let main o = Learnocaml_api.version o.port; if o.base_url <> "" then Printf.printf "Base URL: %s\n%!" o.base_url; + let () = + match Learnocaml_server.check_running (), o.replace with + | None, _ -> () + | Some _, false -> + Printf.eprintf "Error: another server is already running on port %d \ + (consider using option `--replace`)\n%!" + !Learnocaml_server.port; + exit 10 + | Some pid, true -> + Learnocaml_server.kill_running pid + in let rec run () = let minimum_duration = 15. in let t0 = Unix.time () in diff --git a/src/repo/learnocaml_process_exercise_repository.ml b/src/repo/learnocaml_process_exercise_repository.ml index c97d5e03a..2a4a04987 100644 --- a/src/repo/learnocaml_process_exercise_repository.ml +++ b/src/repo/learnocaml_process_exercise_repository.ml @@ -62,7 +62,7 @@ let spawn_grader ?print_result ?dirname meta ex_dir output_json = let rec sleep () = if !n_processes <= 0 then - Lwt_main.yield () >>= sleep + Lwt.pause () >>= sleep else ( decr n_processes; Lwt.return_unit ) diff --git a/src/server/learnocaml_server.ml b/src/server/learnocaml_server.ml index af1fdd830..1a4c7664f 100644 --- a/src/server/learnocaml_server.ml +++ b/src/server/learnocaml_server.ml @@ -715,3 +715,35 @@ let launch () = | e -> Printf.eprintf "Server error: %s\n%!" (Printexc.to_string e); Lwt.return false + +let check_running () = + try + let ic = Printf.ksprintf Unix.open_process_in "lsof -Qti tcp:%d -s tcp:LISTEN" !port in + let pid = match input_line ic with + | "" -> None + | s -> int_of_string_opt s + | exception End_of_file -> None + in + close_in ic; + pid + with Unix.Unix_error _ -> + Printf.eprintf "Warning: could not check for previously running instance"; + None + +let kill_running pid = + let timeout = 15 in + Unix.kill pid Sys.sigint; + Printf.eprintf "Waiting for process %d to terminate... %2d%!" pid timeout; + let rec aux tout = + Printf.eprintf "\027[2D%2d" tout; + if Printf.ksprintf Sys.command "lsof -ti tcp:%d -p %d >/dev/null" !port pid + = 0 + then + if tout <= 0 then + (prerr_endline "Error: process didn't terminate in time"; exit 10) + else + (Unix.sleep 1; + aux (tout - 1)) + in + aux timeout; + prerr_endline "\027[2Dok" diff --git a/src/server/learnocaml_server.mli b/src/server/learnocaml_server.mli index e8153306c..a1606a433 100644 --- a/src/server/learnocaml_server.mli +++ b/src/server/learnocaml_server.mli @@ -16,5 +16,12 @@ val args: (Arg.key * Arg.spec * Arg.doc) list (** Main *) -(* Returns [false] if interrupted prematurely due to an error *) +val check_running: unit -> int option +(** Returns the pid or an existing process listening on the tcp port *) + +val kill_running: int -> unit +(** Kills the given process and waits for termination (fails upon + reaching a timeout) *) + val launch: unit -> bool Lwt.t +(** Returns [false] if interrupted prematurely due to an error *) From 995a79d5f02b787981eb29bffb012d6993e3c63f Mon Sep 17 00:00:00 2001 From: Louis Gesbert Date: Mon, 30 Oct 2023 14:25:36 +0100 Subject: [PATCH 3/3] feat(grader): Show a status line on what is being built This makes it easier to identify problem when one exercise is misbehaving (see https://github.com/ocaml-sf/learn-ocaml-corpus/pull/37/commits/b6e1f61f1956173d08051dbd85c68f98de60c08d) --- .../learnocaml_process_exercise_repository.ml | 34 +++++++++++++++---- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/src/repo/learnocaml_process_exercise_repository.ml b/src/repo/learnocaml_process_exercise_repository.ml index 2a4a04987..3bc70ec0a 100644 --- a/src/repo/learnocaml_process_exercise_repository.ml +++ b/src/repo/learnocaml_process_exercise_repository.ml @@ -47,6 +47,22 @@ let dump_dot exs = let n_processes = ref 1 +let grading_status, grading_status_add, grading_status_remove = + let in_progress = ref [] in + let tty = Unix.isatty Unix.stderr in + let show () = + match !in_progress with + | [] -> flush stderr + | prog -> + Printf.eprintf "Grading in progress: %s" (String.concat " " prog); + if tty then (flush stderr; prerr_string "\r\027[K") else prerr_newline () + in + show, + (fun id -> in_progress := !in_progress @ [id]; show ()), + (fun id -> + in_progress := List.filter (fun x -> not (String.equal x id)) !in_progress; + show ()) + let print_grader_error exercise = function | Ok () -> () | Error (-1) -> () @@ -59,7 +75,7 @@ let print_grader_error exercise = function let spawn_grader dump_outputs dump_reports - ?print_result ?dirname meta ex_dir output_json = + ?print_result ?dirname id meta ex_dir output_json = let rec sleep () = if !n_processes <= 0 then Lwt.pause () >>= sleep @@ -70,15 +86,18 @@ let spawn_grader sleep () >>= fun () -> Lwt.catch (fun () -> read_exercise ex_dir >>= fun exercise -> + grading_status_add id; Grader_cli.grade ~dump_outputs ~dump_reports ~display_callback:false ?print_result ?dirname meta exercise output_json >|= fun r -> + grading_status_remove id; print_grader_error exercise r; incr n_processes; r) (fun e -> incr n_processes; + grading_status_remove id; Printf.eprintf "Grader error: %s\n%!" (Printexc.to_string e); Lwt.return (Error 0)) @@ -214,7 +233,7 @@ let main dest_dir = if !n_processes = 1 then Lwt_list.map_s, fun dump_outputs dump_reports ?print_result ?dirname - meta ex_dir json_path -> + _id meta ex_dir json_path -> read_exercise ex_dir >>= fun exercise -> Grader_cli.grade ~dump_outputs ~dump_reports ~display_callback:true @@ -236,21 +255,22 @@ let main dest_dir = else Lwt.return_unit) (Lwt_unix.files_of_directory ex_dir) >>= fun () -> if not changed then begin - Format.printf "%-24s (no changes)@." id ; + Format.eprintf "%-24s (no changes)@." id ; Lwt.return_true end else begin Learnocaml_precompile_exercise.precompile ~exercise_dir:ex_dir >>= fun () -> grade dump_outputs dump_reports - ~dirname:ex_dir (Index.find index id) ex_dir (Some json_path) + ~dirname:ex_dir id (Index.find index id) ex_dir (Some json_path) >>= function | Ok () -> - Format.printf "%-24s [OK]@." id ; + Format.eprintf "%-24s [OK]@." id ; Lwt.return true | Error _ -> - Format.printf "%-24s [FAILED]@." id ; + Format.eprintf "%-24s [FAILED]@." id ; Lwt.return false - end) + end + >|= fun r -> grading_status (); r) processes_arguments end >>= fun results -> Lwt.return (List.for_all ((=) true) results))