Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions layouts/bibliography/list.html
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,7 @@ <h1 class="bib-title">{{ or .Params.heading .Title }}</h1>
{{- $rendered = $rendered | htmlEscape -}}
<a href="{{ .RelPermalink }}"><span><strong>{{- $rendered | safeHTML -}}</strong></span></a><br>

{{- /* Safe date: use front matter date only if non-empty & matches basic pattern */ -}}
{{- $authors := .Params.authors -}}
{{- $authors := .Params.authors -}}
{{- $editors := .Params.editors -}}
{{- $isPatent := eq .Params.item_type "patent" -}}
{{- /* warnf "authors: %v" $authors */ -}}
Expand Down Expand Up @@ -91,17 +90,19 @@ <h1 class="bib-title">{{ or .Params.heading .Title }}</h1>
<br>
{{- end -}}

{{- /* Safe date: use front matter date only if non-empty & matches basic pattern */ -}}
{{- $d := .Date -}}
{{- $fmtDate := .Params.readabledate -}}
{{- with .Params.date -}}
{{- $datestr := trim . " " -}}
{{- if and (ne $datestr "") (findRE `^\d{4}-\d{2}-\d{2}` $datestr) -}}
{{- $d = time $datestr -}}
{{- $isoDate := trim . " " -}}
{{- if and (ne $isoDate "") (findRE `^\d{4}-\d{2}-\d{2}` $isoDate) -}}
{{- $d = time $isoDate -}}
{{- end -}}
{{- end -}}
{{- end }}
{{- $d = $d.Format "2006-01-02" -}}
{{- /* Don't display bogus date */ -}}
{{- if (ne $d "0001-01-01") }}
<time datetime="{{ $d }}">{{ $d }}</time><br>
<time datetime="{{ $d }}">{{ $fmtDate }}</time>
{{ end -}}

{{ with .Params.abstract -}}
Expand All @@ -114,7 +115,7 @@ <h1 class="bib-title">{{ or .Params.heading .Title }}</h1>
{{- $previewWords := split $plain " " | first $previewWordLimit -}}
{{- $preview = delimit $previewWords " " -}}
{{- else -}}
{{- $preview = replace . "\n" "<br>" -}}
{{- $preview = replace (. | htmlEscape) "\n" "<br>" -}}
{{- end -}}
<br>
<div>{{ $preview | safeHTML }}{{ if $previewing }}…{{ end }}</div>
Expand Down
11 changes: 6 additions & 5 deletions layouts/bibliography/single.html
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,17 @@ <h1>{{ .Title }}</h1>

<p>
{{- $d := .Date -}}
{{- $fmtDate := .Params.readabledate -}}
{{- with .Params.date -}}
{{- $datestr := trim . " " -}}
{{- if and (ne $datestr "") (findRE `^\d{4}-\d{2}-\d{2}` $datestr) -}}
{{- $d = time $datestr -}}
{{- $isoDate := trim . " " -}}
{{- if and (ne $isoDate "") (findRE `^\d{4}-\d{2}-\d{2}` $isoDate) -}}
{{- $d = time $isoDate -}}
{{- end -}}
{{- end }}
{{- $d = $d.Format "2006-01-02" -}}
{{- /* Don't display bogus date */ -}}
{{- if (ne $d "0001-01-01") }}
<time datetime="{{ $d }}">{{ $d }}</time>
<time datetime="{{ $d }}">{{ $fmtDate }}</time>
{{ end -}}
</p>
<p>
Expand All @@ -56,7 +57,7 @@ <h1>{{ .Title }}</h1>
<strong>Abstract</strong>
<br>
{{ with .Params.abstract }}
{{ . | markdownify }}
{{ . | htmlEscape | markdownify }}
{{ else }}
No abstract available.
{{ end }}
Expand Down
8 changes: 8 additions & 0 deletions scripts/bib-fns.jq
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ def issued_iso_string:
.
end;

def issued_date_readable:
if nonBlankKey("issued") and (.issued | nonBlankKey("date-parts")) then
setpath(["readableDateString"];
(.issued["date-parts"][0]) as $p | $p | map(pad2) | join("-"))
else
.
end;

def format_person_name:
if (has("family") and .family != null and (.family|tostring|length)>0) then
.family
Expand Down
58 changes: 38 additions & 20 deletions scripts/bibSplit.pl
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,26 @@
use JSON::PP qw(decode_json encode_json);
use Encode qw(decode encode is_utf8);
use Unicode::Normalize qw(NFC);
use utf8;
BEGIN
{
$bibDir = $ENV{'BIBLIOGRAPHY_DIR'};
$bibItemsDir = $ENV{'BIBITEMS_DIR'};
}

# Handy when using the perl debugger...
# sub is8 {
# my ($s) = @_;
# return is_utf8($s) ? "is UTF8" : "is not UTF8";
# }

# Cleanup text fields
sub sanitize_text {
my ($s) = @_;
return '' unless defined $s;

# Ensure decoded characters
# NOTE: It SEEMS is_utf8() checking may not correctly indicate the correct state of the content!!
$s = decode('UTF-8', $s, Encode::WARN) unless is_utf8($s);

# Repair mojibake (e.g., "’" -> "’") if available
Expand All @@ -19,32 +32,28 @@ sub sanitize_text {

# Normalize and clean
$s = NFC($s); # normalize accents/combining marks
$s =~ s/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\x{80}-\x{9F}]//g; # drop C0/C1 controls
$s =~ s/\x{00A0}/ /g; # NBSP -> space
$s =~ s/\r\n?/ /g; # normalize newlines
$s =~ s/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\x{80}-\x{9F}]//gu; # drop C0/C1 controls
$s =~ s/\x{00A0}/ /gu; # NBSP -> space
$s =~ s/\r\n?/ /gu; # normalize newlines

return $s;
}

BEGIN
{
$bibDir = $ENV{'BIBLIOGRAPHY_DIR'};
$bibItemsDir = $ENV{'BIBITEMS_DIR'};
}
my $item = $_;
my $json = eval { decode_json($item) } or do { warn "Bad JSON line: $_\n"; next; };
my $obj = eval { decode_json($item) } or do { warn "Bad JSON line: $_\n"; next; };

my $key = $json->{key};
my $target = $json->{target} || print STDERR "Cannot find target for key \"$key\" in line: $_\n";
my $key = $obj->{key};
my $target = $obj->{target} || print STDERR "Cannot find target for key \"$key\" in line: $_\n";

if ($key eq $target) { # only top level entries
#my $handle = undef;
#my $itemjson = "$bibItemsDir/$key.json";
#open($handle, ">:encoding(UTF-8)", $itemjson) || die "$0: cannot open $itemjson in write-open mode: $!";
#print $handle $item;
#print $handle (is_utf8($item) ? $item : decode('UTF-8', $item, Encode::WARN));
#close $handle || die "$0: close of file $itemjson failed: $!";

my $obj = eval { decode_json($item) } or do { warn "Bad JSON for $key\n"; next; };
# No need to decode the $item a second time. Just changed the $json above to be $obj.
#my $obj = eval { decode_json($item) } or do { warn "Bad JSON for $key\n"; next; };
delete $obj->{children};

my $type = $obj->{type} // '';
Expand All @@ -59,34 +68,42 @@ BEGIN
my $abs = sanitize_text($obj->{abstract} // '');
# a hack for bulleted lists in the abstracts (use markdown there)
# won't work for nested lists.
$abs =~ s/\n?\n\N{U+2022}/\n*/g;
$abs =~ s/\n?\n\N{U+2022}/\n*/gu;
my $indented = join('', map { " $_\n" } split(/\n/, $abs));
my $abstract = $indented eq '' ? "abstract: ''" : "abstract: |\n$indented";

my $itemDate = defined $obj->{isoDateString} ? $obj->{isoDateString} : '';
my $itemReadableDate = defined $obj->{readableDateString} ? $obj->{readableDateString} : '';

my $itemAuthors = '';
if (ref($obj->{authorsFormatted}) eq 'ARRAY' && @{$obj->{authorsFormatted}}) {
$itemAuthors = "\n";
for my $a (@{$obj->{authorsFormatted}}) {
my $quoted = encode_json($a // '');
$itemAuthors .= " - $quoted\n";
# The encode_json is where extended unicode chars get corrupted, e.g., "Emanuelson, Pär"
# There may be other things that now don't work!!
# my $quoted = encode_json($a // '');
# sanitize_text seems to handle them correctly
my $san = sanitize_text($a // '');
$itemAuthors .= " - \"$san\"\n";
}
$itemAuthors =~ s/\n$//; # strip trailing newline
$itemAuthors =~ s/\n$//u; # strip trailing newline
}

my $itemEditors = '';
if (ref($obj->{editorsFormatted}) eq 'ARRAY' && @{$obj->{editorsFormatted}}) {
$itemEditors = "\n";
for my $a (@{$obj->{editorsFormatted}}) {
my $quoted = encode_json($a // '');
$itemEditors .= " - $quoted\n";
# as above...
# my $quoted = encode_json($a // '');
my $san = sanitize_text($a // '');
$itemEditors .= " - \"$san\"\n";
}
$itemEditors =~ s/\n$//; # strip trailing newline
$itemEditors =~ s/\n$//u; # strip trailing newline
}

my $urlSource = defined $obj->{url} ? $obj->{url} : '';

# Some/most/all of these *may* need sanitize_text
# optional fields - ones used vary by value of type
my $applicationNumber = defined $obj->{applicationNumber} ? qq{"$obj->{applicationNumber}"} : '""';
my $assignee = defined $obj->{assignee} ? qq{"$obj->{assignees}"} : '""';
Expand Down Expand Up @@ -157,6 +174,7 @@ BEGIN
---
$title
date: $itemDate
readabledate: $itemReadableDate
type: bibliography
item_type: $type
authors: $itemAuthors
Expand Down
15 changes: 9 additions & 6 deletions scripts/update_bibliography.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@ function usage () {
echo -e "Usage: $0 [ options ] [ input_items_file ]\n"
echo -e "Options:"
echo -e " -h | --help \tDisplay this message and exit."
echo -e " -r | --rawitems \tSave the complete downloaded JSON as '00-rawItems.json' (See 'input_items_file' below.)."
echo -e " -r | --rawitems \tSave the complete downloaded JSON as '00-rawItems.json'"
echo -e " \t(See 'input_items_file' below.)."
echo -e " -g | --tagsfile \tGenerate 'tags.json' containing all tags on the 'cleaned-up' set of entries."
echo -e " -y | --typefiles \tGenerate item type information JSON files. (See below.)"
echo -e " -c | --collectionsfiles \tGenerate two JSON files containing info about each of the Zotero collections."
echo -e " -u | --curlfiles \tGenerate files in the 'curl/' directory with the output of each call to curl. Very low level debugging."
echo -e " -u | --curlfiles \tGenerate files in the 'curl/' directory with the output of each call to curl."
echo -e " \t(Very low level debugging.)"
echo -e " -d | --debugfiles \tGenerate numbered files with the intermediate processing step output JASON."
echo -e " -i N | --infolevel N \tSet the level of display of informational messages. N is 0-10 (Default = 2.). (See below.)"
echo -e " -i N | --infolevel N \tSet the level of display of informational messages. N is 0-10 (Default = 2.)."
echo -e " \t(See below.)"
echo -e "\n typefiles: These 3 files contain the type and itemType information for the entries with different level of details."
echo -e "\n infolevel: The infolevel controls how much detail is presented during processing."
echo -e " 0: NO info messages."
Expand Down Expand Up @@ -374,11 +377,11 @@ if $debugFiles ; then
fi
finalCount=$(jq '. | length' <<< "$items")

items=$(jq 'include "./bib-fns";map(issued_iso_string)' <<< "$items")
items=$(jq 'include "./bib-fns";map(issued_iso_string | issued_date_readable | add_author_string | add_editor_string)' <<< "$items")

items=$(jq 'include "./bib-fns";map(add_author_string)' <<< "$items")
#items=$(jq 'include "./bib-fns";map(add_author_string)' <<< "$items")

items=$(jq 'include "./bib-fns";map(add_editor_string)' <<< "$items")
#items=$(jq 'include "./bib-fns";map(add_editor_string)' <<< "$items")
# if $removeChildrenFromFinalFile; then
# # Remove .children arrays, if any. Save space.
# items=$(jq 'map(del(.children))' <<< "$items")
Expand Down