diff --git a/layouts/bibliography/list.html b/layouts/bibliography/list.html
index 46abbca3..0cc8e81b 100644
--- a/layouts/bibliography/list.html
+++ b/layouts/bibliography/list.html
@@ -62,8 +62,7 @@
{{ or .Params.heading .Title }}
{{- $rendered = $rendered | htmlEscape -}}
{{- $rendered | safeHTML -}}
- {{- /* Safe date: use front matter date only if non-empty & matches basic pattern */ -}}
- {{- $authors := .Params.authors -}}
+ {{- $authors := .Params.authors -}}
{{- $editors := .Params.editors -}}
{{- $isPatent := eq .Params.item_type "patent" -}}
{{- /* warnf "authors: %v" $authors */ -}}
@@ -91,17 +90,19 @@ {{ or .Params.heading .Title }}
{{- end -}}
+ {{- /* Safe date: use front matter date only if non-empty & matches basic pattern */ -}}
{{- $d := .Date -}}
+ {{- $fmtDate := .Params.readabledate -}}
{{- with .Params.date -}}
- {{- $datestr := trim . " " -}}
- {{- if and (ne $datestr "") (findRE `^\d{4}-\d{2}-\d{2}` $datestr) -}}
- {{- $d = time $datestr -}}
+ {{- $isoDate := trim . " " -}}
+ {{- if and (ne $isoDate "") (findRE `^\d{4}-\d{2}-\d{2}` $isoDate) -}}
+ {{- $d = time $isoDate -}}
{{- end -}}
- {{- end -}}
+ {{- end }}
{{- $d = $d.Format "2006-01-02" -}}
{{- /* Don't display bogus date */ -}}
{{- if (ne $d "0001-01-01") }}
-
+
{{ end -}}
{{ with .Params.abstract -}}
@@ -114,7 +115,7 @@ {{ or .Params.heading .Title }}
{{- $previewWords := split $plain " " | first $previewWordLimit -}}
{{- $preview = delimit $previewWords " " -}}
{{- else -}}
- {{- $preview = replace . "\n" "
" -}}
+ {{- $preview = replace (. | htmlEscape) "\n" "
" -}}
{{- end -}}
{{ $preview | safeHTML }}{{ if $previewing }}…{{ end }}
diff --git a/layouts/bibliography/single.html b/layouts/bibliography/single.html
index 23e07f76..fed28338 100644
--- a/layouts/bibliography/single.html
+++ b/layouts/bibliography/single.html
@@ -32,16 +32,17 @@ {{ .Title }}
{{- $d := .Date -}}
+ {{- $fmtDate := .Params.readabledate -}}
{{- with .Params.date -}}
- {{- $datestr := trim . " " -}}
- {{- if and (ne $datestr "") (findRE `^\d{4}-\d{2}-\d{2}` $datestr) -}}
- {{- $d = time $datestr -}}
+ {{- $isoDate := trim . " " -}}
+ {{- if and (ne $isoDate "") (findRE `^\d{4}-\d{2}-\d{2}` $isoDate) -}}
+ {{- $d = time $isoDate -}}
{{- end -}}
{{- end }}
{{- $d = $d.Format "2006-01-02" -}}
{{- /* Don't display bogus date */ -}}
{{- if (ne $d "0001-01-01") }}
-
+
{{ end -}}
@@ -56,7 +57,7 @@
{{ .Title }}
Abstract
{{ with .Params.abstract }}
- {{ . | markdownify }}
+ {{ . | htmlEscape | markdownify }}
{{ else }}
No abstract available.
{{ end }}
diff --git a/scripts/bib-fns.jq b/scripts/bib-fns.jq
index 4fc0151b..1ba4d41d 100644
--- a/scripts/bib-fns.jq
+++ b/scripts/bib-fns.jq
@@ -30,6 +30,14 @@ def issued_iso_string:
.
end;
+def issued_date_readable:
+ if nonBlankKey("issued") and (.issued | nonBlankKey("date-parts")) then
+ setpath(["readableDateString"];
+ (.issued["date-parts"][0]) as $p | $p | map(pad2) | join("-"))
+ else
+ .
+ end;
+
def format_person_name:
if (has("family") and .family != null and (.family|tostring|length)>0) then
.family
diff --git a/scripts/bibSplit.pl b/scripts/bibSplit.pl
index d383c400..044a1738 100755
--- a/scripts/bibSplit.pl
+++ b/scripts/bibSplit.pl
@@ -2,6 +2,18 @@
use JSON::PP qw(decode_json encode_json);
use Encode qw(decode encode is_utf8);
use Unicode::Normalize qw(NFC);
+use utf8;
+BEGIN
+{
+ $bibDir = $ENV{'BIBLIOGRAPHY_DIR'};
+ $bibItemsDir = $ENV{'BIBITEMS_DIR'};
+}
+
+# Handy when using the perl debugger...
+# sub is8 {
+# my ($s) = @_;
+# return is_utf8($s) ? "is UTF8" : "is not UTF8";
+# }
# Cleanup text fields
sub sanitize_text {
@@ -9,6 +21,7 @@ sub sanitize_text {
return '' unless defined $s;
# Ensure decoded characters
+ # NOTE: It SEEMS is_utf8() checking may not correctly indicate the correct state of the content!!
$s = decode('UTF-8', $s, Encode::WARN) unless is_utf8($s);
# Repair mojibake (e.g., "â" -> "’") if available
@@ -19,32 +32,28 @@ sub sanitize_text {
# Normalize and clean
$s = NFC($s); # normalize accents/combining marks
- $s =~ s/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\x{80}-\x{9F}]//g; # drop C0/C1 controls
- $s =~ s/\x{00A0}/ /g; # NBSP -> space
- $s =~ s/\r\n?/ /g; # normalize newlines
+ $s =~ s/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\x{80}-\x{9F}]//gu; # drop C0/C1 controls
+ $s =~ s/\x{00A0}/ /gu; # NBSP -> space
+ $s =~ s/\r\n?/ /gu; # normalize newlines
return $s;
}
-BEGIN
-{
- $bibDir = $ENV{'BIBLIOGRAPHY_DIR'};
- $bibItemsDir = $ENV{'BIBITEMS_DIR'};
-}
my $item = $_;
-my $json = eval { decode_json($item) } or do { warn "Bad JSON line: $_\n"; next; };
+my $obj = eval { decode_json($item) } or do { warn "Bad JSON line: $_\n"; next; };
-my $key = $json->{key};
-my $target = $json->{target} || print STDERR "Cannot find target for key \"$key\" in line: $_\n";
+my $key = $obj->{key};
+my $target = $obj->{target} || print STDERR "Cannot find target for key \"$key\" in line: $_\n";
if ($key eq $target) { # only top level entries
#my $handle = undef;
#my $itemjson = "$bibItemsDir/$key.json";
#open($handle, ">:encoding(UTF-8)", $itemjson) || die "$0: cannot open $itemjson in write-open mode: $!";
- #print $handle $item;
+ #print $handle (is_utf8($item) ? $item : decode('UTF-8', $item, Encode::WARN));
#close $handle || die "$0: close of file $itemjson failed: $!";
- my $obj = eval { decode_json($item) } or do { warn "Bad JSON for $key\n"; next; };
+ # No need to decode the $item a second time. Just changed the $json above to be $obj.
+ #my $obj = eval { decode_json($item) } or do { warn "Bad JSON for $key\n"; next; };
delete $obj->{children};
my $type = $obj->{type} // '';
@@ -59,34 +68,42 @@ BEGIN
my $abs = sanitize_text($obj->{abstract} // '');
# a hack for bulleted lists in the abstracts (use markdown there)
# won't work for nested lists.
- $abs =~ s/\n?\n\N{U+2022}/\n*/g;
+ $abs =~ s/\n?\n\N{U+2022}/\n*/gu;
my $indented = join('', map { " $_\n" } split(/\n/, $abs));
my $abstract = $indented eq '' ? "abstract: ''" : "abstract: |\n$indented";
my $itemDate = defined $obj->{isoDateString} ? $obj->{isoDateString} : '';
+ my $itemReadableDate = defined $obj->{readableDateString} ? $obj->{readableDateString} : '';
my $itemAuthors = '';
if (ref($obj->{authorsFormatted}) eq 'ARRAY' && @{$obj->{authorsFormatted}}) {
$itemAuthors = "\n";
for my $a (@{$obj->{authorsFormatted}}) {
- my $quoted = encode_json($a // '');
- $itemAuthors .= " - $quoted\n";
+ # The encode_json is where extended unicode chars get corrupted, e.g., "Emanuelson, Pär"
+ # There may be other things that now don't work!!
+ # my $quoted = encode_json($a // '');
+ # sanitize_text seems to handle them correctly
+ my $san = sanitize_text($a // '');
+ $itemAuthors .= " - \"$san\"\n";
}
- $itemAuthors =~ s/\n$//; # strip trailing newline
+ $itemAuthors =~ s/\n$//u; # strip trailing newline
}
my $itemEditors = '';
if (ref($obj->{editorsFormatted}) eq 'ARRAY' && @{$obj->{editorsFormatted}}) {
$itemEditors = "\n";
for my $a (@{$obj->{editorsFormatted}}) {
- my $quoted = encode_json($a // '');
- $itemEditors .= " - $quoted\n";
+ # as above...
+ # my $quoted = encode_json($a // '');
+ my $san = sanitize_text($a // '');
+ $itemEditors .= " - \"$san\"\n";
}
- $itemEditors =~ s/\n$//; # strip trailing newline
+ $itemEditors =~ s/\n$//u; # strip trailing newline
}
my $urlSource = defined $obj->{url} ? $obj->{url} : '';
+ # Some/most/all of these *may* need sanitize_text
# optional fields - ones used vary by value of type
my $applicationNumber = defined $obj->{applicationNumber} ? qq{"$obj->{applicationNumber}"} : '""';
my $assignee = defined $obj->{assignee} ? qq{"$obj->{assignees}"} : '""';
@@ -157,6 +174,7 @@ BEGIN
---
$title
date: $itemDate
+readabledate: $itemReadableDate
type: bibliography
item_type: $type
authors: $itemAuthors
diff --git a/scripts/update_bibliography.sh b/scripts/update_bibliography.sh
index 8a827414..2229ae5f 100755
--- a/scripts/update_bibliography.sh
+++ b/scripts/update_bibliography.sh
@@ -7,13 +7,16 @@ function usage () {
echo -e "Usage: $0 [ options ] [ input_items_file ]\n"
echo -e "Options:"
echo -e " -h | --help \tDisplay this message and exit."
- echo -e " -r | --rawitems \tSave the complete downloaded JSON as '00-rawItems.json' (See 'input_items_file' below.)."
+ echo -e " -r | --rawitems \tSave the complete downloaded JSON as '00-rawItems.json'"
+ echo -e " \t(See 'input_items_file' below.)."
echo -e " -g | --tagsfile \tGenerate 'tags.json' containing all tags on the 'cleaned-up' set of entries."
echo -e " -y | --typefiles \tGenerate item type information JSON files. (See below.)"
echo -e " -c | --collectionsfiles \tGenerate two JSON files containing info about each of the Zotero collections."
- echo -e " -u | --curlfiles \tGenerate files in the 'curl/' directory with the output of each call to curl. Very low level debugging."
+ echo -e " -u | --curlfiles \tGenerate files in the 'curl/' directory with the output of each call to curl."
+ echo -e " \t(Very low level debugging.)"
echo -e " -d | --debugfiles \tGenerate numbered files with the intermediate processing step output JASON."
- echo -e " -i N | --infolevel N \tSet the level of display of informational messages. N is 0-10 (Default = 2.). (See below.)"
+ echo -e " -i N | --infolevel N \tSet the level of display of informational messages. N is 0-10 (Default = 2.)."
+ echo -e " \t(See below.)"
echo -e "\n typefiles: These 3 files contain the type and itemType information for the entries with different level of details."
echo -e "\n infolevel: The infolevel controls how much detail is presented during processing."
echo -e " 0: NO info messages."
@@ -374,11 +377,11 @@ if $debugFiles ; then
fi
finalCount=$(jq '. | length' <<< "$items")
-items=$(jq 'include "./bib-fns";map(issued_iso_string)' <<< "$items")
+items=$(jq 'include "./bib-fns";map(issued_iso_string | issued_date_readable | add_author_string | add_editor_string)' <<< "$items")
-items=$(jq 'include "./bib-fns";map(add_author_string)' <<< "$items")
+#items=$(jq 'include "./bib-fns";map(add_author_string)' <<< "$items")
-items=$(jq 'include "./bib-fns";map(add_editor_string)' <<< "$items")
+#items=$(jq 'include "./bib-fns";map(add_editor_string)' <<< "$items")
# if $removeChildrenFromFinalFile; then
# # Remove .children arrays, if any. Save space.
# items=$(jq 'map(del(.children))' <<< "$items")