Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions tool/ood-gen/lib/youtube.ml
Original file line number Diff line number Diff line change
Expand Up @@ -187,17 +187,33 @@ let scrape yaml_file =
in
match fetched with
| Ok fetched ->
let yaml =
let all_videos =
VideoSet.union fetched scraped
|> VideoSet.to_seq |> List.of_seq
|> List.sort (fun a b -> compare b.Vid.published a.Vid.published)
|> Vid.video_list_to_yaml
in
let yaml = Vid.video_list_to_yaml all_videos in
(* The yaml library uses a fixed-size output buffer. The default is 262140
bytes, which was exceeded when we had 203 videos (~262KB output). This
caused the document_end operation to fail with "doc_end failed" error.

Current stats: 203 videos ≈ 260KB, average ~1.3KB per video. We use a
2MB buffer to accommodate growth to ~1500 videos before hitting limits.
If the list grows beyond that, this will fail with a clear error
message. *)
let buffer_size = 2 * 1024 * 1024 in
(* 2MB *)
let output =
Yaml.pp Format.str_formatter yaml;
Format.flush_str_formatter ()
match Yaml.to_string ~len:buffer_size yaml with
| Ok s -> s
| Error (`Msg err) ->
failwith
(Printf.sprintf
"YAML serialization failed (tried %d videos, buffer size %d \
bytes): %s"
(List.length all_videos) buffer_size err)
in
let oc = open_out yaml_file in
Printf.fprintf oc "%s" output;
output_string oc output;
close_out oc
| Error (`Msg msg) -> failwith msg
Loading