Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion scripts/bib-fns.jq
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ def unwrapDiv:
def moveURL_to_url:
select(nonBlankKey("URL")) | (setpath(["url"]; .URL) | del(.URL)) // .;

def make_DOI_to_url($doi):
if ($doi | startswith("https:")) then $doi else "https://doi.org/" + ($doi | ltrimstr("/")) end ;

def cleanAbstracts:
if blankKey("abstract") and nonBlankKey("abstractNote") then
setpath(["abstract"]; .abstractNote) | del(.abstractNote)
Expand Down Expand Up @@ -69,4 +72,21 @@ def getNeededUrls: # assumes that array of all current items is the input
.[0] as $k # get the key
| select(($keys | all(. != $k))) # if this key isn't already in the $keys
| .[1]?) # include this url in this collected array
;
;

# def intersection(x;y):
# if (y|length) == 0 then
# []
# else (x|unique) as $x
# | $x - ($x - y)
# end;

def semiflatten: # assumes that only one item is the input
. as $item
| (keys - ["csljson","data"]) as $topKeys
| ((.csljson // {}) * .data) as $inner
| (($inner | keys) - ["key","version"]) as $innerKeys
| ( ($topKeys | map(. as $tKey | {"key": $tKey, "value": ($item | getpath([$tKey]))}))
+ ($innerKeys | map(. as $iKey | {"key": $iKey, "value": ($inner | getpath([$iKey]))})) )
| from_entries;

184 changes: 148 additions & 36 deletions scripts/update_bibliography.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,28 @@

set -e

rawItemsFile=true
debugFiles=false
tagFiles=false
typeFiles=false
# The collection files will be created only if directly querying Zotero API.
collectionFiles=false

dfn="00"
function debugFileName() {
# The $1 is the file NAME string (without the number prefix or extension)
# The $2 is the previous instance of the generated debugFileName
# and the "count" is in the leading 2 characters.
local cfn=$2
# increment the count before using it.
# (The leading 1 and the - 100 are so a leading 0 doesn't set octal parsing)
echo $( printf "%02d-%s.json" $(( ("1${cfn:0:2}" - 100) + 1)) $1 )
}

GROUP_ID=2914042

items=""
collections=""

function add_items_from_collection () {
local collection_key="$1"
Expand All @@ -20,84 +39,177 @@ function add_items_from_collection () {
[[ $(jq '. | length' <<< "$this_page") > 0 ]] || break
done

local subcollections=$(curl -s "https://api.zotero.org/groups/$GROUP_ID/collections/$collection_key/collections" | jq -r '.[].data|{key, name, parentCollection}' | jq -s '.')
collections=$(jq -s 'add' <<< "$subcollections$collections")

# Recurse into subcollections
while read subcollection_key; do
add_items_from_collection $subcollection_key
done < <(curl -s "https://api.zotero.org/groups/$GROUP_ID/collections/$collection_key/collections" | jq -r '.[].key')
done < <(jq -r '.[].key' <<< "$subcollections")
}

root_collection="FSK5IX4F"
if [[ $# -eq 0 ]] ; then
# Initialize with the root collection.
collections=$(curl -s "https://api.zotero.org/groups/$GROUP_ID/collections/top" | jq -r '.[].data|{key, name, parentCollection}' | jq -s '.' | jq "map(select(.key==\"$root_collection\"))")
add_items_from_collection $root_collection
else
items=$(<$1)
fi

# echo "$items" > 00-rawItems-pre-needed.json
if $collectionFiles ; then
echo "$collections" > collections.json
fi
echo "$(jq '. | length' <<< "$collections") collections"

while read neededUrl; do
while read neededUrl; do
item=$(curl -s "$neededUrl?include=data,csljson&v=3")
items=$(jq -s 'add' <<< "[$item]$items")
done < <(jq -r 'include "./bib-fns";getNeededUrls|.[]' <<< "$items")

done < <(jq -r 'include "./bib-fns";getNeededUrls|.[]' <<< "$items")

grouped_collections=$(jq 'group_by(.parentCollection)' <<< "$collections")
# This is not yet fully hierarchically nested.
if $collectionFiles ; then
echo "$grouped_collections" > grouped_collections.json
fi
# Only if we got the raw items from the Zotero API
if $rawItemsFile ; then
echo "$items" > 00-rawItems.json
fi
else
items=$(<$1)
fi

echo "Got $(jq '. | length' <<< "$items") items"

# Piece-wise processing for debugging:
# echo "$items" > 0-rawItems.json

items=$(jq 'map(.csljson * .data)' <<< "$items")
# echo "$items" > 1-mapped.json
items=$(jq 'include "./bib-fns";map(semiflatten)' <<< "$items")
if $debugFiles ; then
dfn=$(debugFileName "semiflattened" $dfn)
echo "$items" > "$dfn"
fi
items=$(jq 'include "./bib-fns";map(if has("note") then .note |= unwrapDiv else . end)' <<< "$items")
# echo "$items" > 2-cleanDiv.json
if $debugFiles ; then
dfn=$(debugFileName "cleanDiv" $dfn)
echo "$items" > "$dfn"
fi
#find and eliminate duplicate .key entries
groupedByKey=$(jq 'group_by(.key)' <<< "$items")
dupIDs=$(jq 'map(select(length>1) | .[0])' <<< "$groupedByKey")
# echo "$dupIDs" > dupIDs.json
if $debugFiles ; then
dupIDs=$(jq 'map(select(length>1) | {key: .[0].key, items: .})' <<< "$groupedByKey")
dfn=$(debugFileName "dupIDs" $dfn)
echo "$dupIDs" > "$dfn"
fi

#remove duplicates
items=$(jq 'map(.[0]) | sort_by(.date)' <<< "$groupedByKey")
# echo "$items" > 3-noDupSorted.json
if $debugFiles ; then
dfn=$(debugFileName "noDupSorted" $dfn)
echo "$items" > "$dfn"
fi
if $typeFiles ; then
types=$(jq 'map({key, title, type, itemType}) | group_by(.type) | map({type:.[0].type, itemTypes:(group_by(.itemType)|map({itemType:.[0].itemType, items:map({title, key})}))})' <<<"$items")
echo "$types" > titleKeyTypeInfo.json
types=$(jq 'group_by(.type) | map({type:.[0].type, itemTypes:(group_by(.itemType)|map({itemType:.[0].itemType, items:.}))})' <<<"$items")
echo "$types" > fullTypeInfo.json
fi
if $tagFiles ; then
tags=$(jq 'map(.tags) | flatten | map(.tag) | unique' <<<"$items")
echo "$tags" > tags.json
fi
# consolidate URL into url field
items=$(jq 'include "./bib-fns";map(if nonBlankKey("URL") then setpath(["url"]; .URL) | del(.URL) else . end)' <<< "$items")
# echo "$items" > consolidated.json
if $debugFiles ; then
dfn=$(debugFileName "consolidated" $dfn)
echo "$items" > "$dfn"
fi

# Use DOI to create url where needed and available
items=$(jq 'include "./bib-fns";map(if (blankKey("url") and nonBlankKey("DOI")) then setpath(["url"]; make_DOI_to_url(.DOI)) else . end)' <<< "$items")
if $debugFiles ; then
dfn=$(debugFileName "DOI-url" $dfn)
echo "$items" > "$dfn"
fi

items=$(jq 'include "./bib-fns";map(getTargetInfo)' <<< "$items")
# echo "$items" > withTargetInfo.json
if $debugFiles ; then
dfn=$(debugFileName "withTargetInfo" $dfn)
echo "$items" > "$dfn"
fi

# now use children items to amend the info for the parentItem
allFixupInfo=$(jq 'group_by(.target)|map(sort_by(.parentItem))' <<< "$items")
# echo "$allFixupInfoGrouped" > allFixupInfoGrouped.json
if $debugFiles ; then
dfn=$(debugFileName "allFixupInfo" $dfn)
echo "$allFixupInfo" > "$dfn"
fi
# embed children into their parentItem (children field). Then delete items that Zotero indicated as such.
items=$(jq 'map(if has(1) then (first + {children: .[1:]}) else first end)|map(select(.deleted|not))' <<< "$allFixupInfo")
# echo "$items" > itemsNestedChildren.json
if $debugFiles ; then
dfn=$(debugFileName "itemsNestedChildren" $dfn)
echo "$items" > "$dfn"
fi

# echo "Got $(jq '. | length' <<< "$items") clean, top-level items"
echo "Got $(jq '. | length' <<< "$items") clean, top-level items"
items=$(jq 'include "./bib-fns";map(applyChildrenAmendments)' <<< "$items")
# echo "$items" > updatedItems.json

# absNote=$(jq 'include "./bib-fns";map(select((nonBlankKey("abstract") or nonBlankKey("abstractNote")) and (.abstract != .abstractNote)) | {key: .key, title: .title, abstract: .abstract, abstractNote: .abstractNote})' <<< "$items")
# echo "$absNote" > absNoteMismatch.json
if $debugFiles ; then
dfn=$(debugFileName "updatedItems" $dfn)
echo "$items" > "$dfn"
fi
absNote=$(jq 'include "./bib-fns";map(select((nonBlankKey("abstract") or nonBlankKey("abstractNote")) and (.abstract != .abstractNote)) | {key: .key, title: .title, abstract: .abstract, abstractNote: .abstractNote})' <<< "$items")
if $debugFiles ; then
dfn=$(debugFileName "absNoteMismatch" $dfn)
echo "$absNote" > "$dfn"
fi
# remove redundant .abstractNote fields
items=$(jq 'include "./bib-fns";map(cleanAbstracts)' <<< "$items")
# echo "$items" > abstractsCleaned.json
if $debugFiles ; then
dfn=$(debugFileName "abstractsCleaned" $dfn)
echo "$items" > "$dfn"
fi
if $typeFiles ; then
types=$(jq 'map({key, title, type, itemType}) | group_by(.type) | map({type:.[0].type, itemTypes:(group_by(.itemType)|map({itemType:.[0].itemType, items:map({title, key})}))})' <<<"$items")
echo "$types" > finalTitleKeyTypeInfo.json
fi
noURL=$(jq 'include "./bib-fns";map(select(blankKey("url")))' <<< "$items")
# echo "$noURL" > noURL.json
if $debugFiles ; then
dfn=$(debugFileName "noURL" $dfn)
echo "$noURL" > "$dfn"
fi
finalCount=$(jq '. | length' <<< "$items")
# Remove .children arrays, if any. Save space.
items=$(jq 'map(del(.children))' <<< "$items")
# # Remove .children arrays, if any. Save space.
# items=$(jq 'map(del(.children))' <<< "$items")
# if $debugFiles ; then
# dfn=$(debugFileName "withoutChildrenArray")
# echo "$items" > "$dfn"
# fi
# Group by year
items=$(jq 'group_by(.issued."date-parts"[0][0])' <<< "$items")
# echo "$items" > 4-grouped.json
if $debugFiles ; then
dfn=$(debugFileName "grouped" $dfn)
echo "$items" > "$dfn"
fi
items=$(jq 'map({ (.[0].issued."date-parts"[0][0] // "Undated" | tostring): . })' <<< "$items")
# echo "$items" > 5-Undated.json
if $debugFiles ; then
dfn=$(debugFileName "Undated" $dfn)
echo "$items" > "$dfn"
fi
items=$(jq 'map(to_entries)' <<< "$items")
# echo "$items" > 6-entries.json
if $debugFiles ; then
dfn=$(debugFileName "entries" $dfn)
echo "$items" > "$dfn"
fi
items=$(jq 'flatten(1)' <<< "$items")
# echo "$items" > 7-flattened.json
if $debugFiles ; then
dfn=$(debugFileName "flattened" $dfn)
echo "$items" > "$dfn"
fi
items=$(jq 'group_by(.key)' <<< "$items")
# echo "$items" > 8-groupedByKey.json
if $debugFiles ; then
dfn=$(debugFileName "groupedByKey" $dfn)
echo "$items" > "$dfn"
fi
items=$(jq 'include "./bib-fns";map(reconstituteGroupedEntries) | from_entries' <<< "$items")
# echo "$items" > 9-final.json
if $debugFiles ; then
dfn=$(debugFileName "final" $dfn)
echo "$items" > "$dfn"
fi


echo "Outputting CSL JSON"
Expand Down