diff --git a/student_auto_feed/config.php b/student_auto_feed/config.php index 40432d7..5fddcb9 100644 --- a/student_auto_feed/config.php +++ b/student_auto_feed/config.php @@ -3,9 +3,7 @@ /* HEADING --------------------------------------------------------------------- * * config.php script used by submitty_student_auto_feed - * By Peter Bailie, Systems Programmer (RPI dept of computer science) - * - * Requires minimum PHP version 7.3 with pgsql extension. + * By Peter Bailie, Renssealer Polytechnic Institute * * Configuration of submitty_student_auto_feed is structured through a series * of named constants. @@ -117,6 +115,7 @@ define('COLUMN_EMAIL', 4); //Student's Campus Email define('COLUMN_TERM_CODE', 11); //Semester code used in data validation define('COLUMN_REG_ID', 12); //Course and Section registration ID +define('COLUMN_CREDITS', 13); //Credits registered //Validate term code. Set to null to disable this check. define('EXPECTED_TERM_CODE', '201705'); @@ -127,6 +126,25 @@ //Set to true, if Submitty is using SAML for authentication. define('PROCESS_SAML', true); +/* RENSSELAER CENTER FOR OPEN SOURCE (RCOS) ----------------------------------- + * RCOS is not just one course, but several. Some of these courses also + * permit a student to declare their credit load. The data feed will need + * a column showing a student's credit load. See above: COLUMN_CREDITS + * + * Create only one RCOS course in Submitty, which will show up in the + * grader's/instructor's course list. The other RCOS courses must be mapped to + * this first course. Registration sections do need to be fully mapped, as the + * database does not permit mapping NULL sections. However, the upsert process + * will override how RCOS enrollments are translated, so that registration + * sections are, per student, "{course}-{credits}" e.g. J. Doe is enrolled in + * RCOS course CSCI4700 for 4 credits. They will be listed as enrolled in + * registration section "CSCI4700-4" + */ + +// List *ALL* RCOS courses, as an array. +// If you are not tracking RCOS, then set this as null or an empty array. +define('RCOS_COURSE_LIST', null); + /* DATA SOURCING -------------------------------------------------------------- * The Student Autofeed provides helper scripts to retrieve the CSV file for * processing. Shell script ssaf.sh is used to invoke one of the helper diff --git a/student_auto_feed/readme.md b/student_auto_feed/readme.md index 720714d..076207f 100644 --- a/student_auto_feed/readme.md +++ b/student_auto_feed/readme.md @@ -10,9 +10,9 @@ policies and practices.__ Detailed instructions can be found at [http://submitty.org/sysadmin/student\_auto\_feed](http://submitty.org/sysadmin/student_auto_feed) -Requirements: PHP 7.3 or higher with pgsql extension. `imap_remote.php` also -requires the imap extension. This system is intended to be platform agnostic, -but has been developed and tested with Ubuntu Linux. +Requires the pgsql extension. `imap_remote.php` also requires the imap extension. +This system is intended to be platform agnostic, but has been developed and tested +with Ubuntu Linux. ## submitty\_student\_auto\_feed.php A command line executable script to read a student enrollment data CSV file and diff --git a/student_auto_feed/ssaf_rcos.php b/student_auto_feed/ssaf_rcos.php new file mode 100644 index 0000000..fbe122b --- /dev/null +++ b/student_auto_feed/ssaf_rcos.php @@ -0,0 +1,30 @@ +course_list = RCOS_COURSE_LIST ?? []; + array_walk($this->course_list, function(&$v, $i) { $v = strtolower($v); }); + sort($this->course_list, SORT_STRING); + } + + /** Adjusts `$row[COLUMN_SECTION]` when `$course` is an RCOS course. */ + public function map(string $course, array &$row): void { + if (in_array($course, $this->course_list, true)) { + $course = strtoupper($course); + $row[COLUMN_SECTION] = "{$course}-{$row[COLUMN_CREDITS]}"; + } + } +} diff --git a/student_auto_feed/ssaf_validate.php b/student_auto_feed/ssaf_validate.php index 80b0c0b..f2dcaa1 100644 --- a/student_auto_feed/ssaf_validate.php +++ b/student_auto_feed/ssaf_validate.php @@ -74,48 +74,6 @@ public static function validate_row($row, $row_num) : bool { return true; } - /** - * Check $rows for duplicate user IDs. - * - * Submitty's master DB does not permit students to register more than once - * for any course. It would trigger a key violation exception. This - * function checks for data anomalies where a student shows up in a course - * more than once as that is indicative of an issue with CSV file data. - * Returns TRUE, as in no error, when $rows has all unique user IDs. - * False, as in error found, otherwise. $user_ids is filled when return - * is FALSE. - * - * @param array $rows Data rows to check (presumably an entire couse). - * @param string[] &$user_id Duplicated user ID, when found. - * @param string[] &$d_rows Rows containing duplicate user IDs, indexed by user ID. - * @return bool TRUE when all user IDs are unique, FALSE otherwise. - */ - public static function check_for_duplicate_user_ids(array $rows, &$user_ids, &$d_rows) : bool { - usort($rows, function($a, $b) { return $a[COLUMN_USER_ID] <=> $b[COLUMN_USER_ID]; }); - - $user_ids = []; - $d_rows = []; - $are_all_unique = true; // Unless proven FALSE - $length = count($rows); - for ($i = 1; $i < $length; $i++) { - $j = $i - 1; - if ($rows[$i][COLUMN_USER_ID] === $rows[$j][COLUMN_USER_ID]) { - $are_all_unique = false; - $user_id = $rows[$i][COLUMN_USER_ID]; - $user_ids[] = $user_id; - $d_rows[$user_id][] = $j; - $d_rows[$user_id][] = $i; - } - } - - foreach($d_rows as &$d_row) { - array_unique($d_row, SORT_REGULAR); - } - unset($d_row); - - return $are_all_unique; - } - /** * Validate that there isn't an excessive drop ratio in course enrollments. * diff --git a/student_auto_feed/submitty_student_auto_feed.php b/student_auto_feed/submitty_student_auto_feed.php index 9f1b9f2..cf726ba 100755 --- a/student_auto_feed/submitty_student_auto_feed.php +++ b/student_auto_feed/submitty_student_auto_feed.php @@ -5,7 +5,7 @@ * * This script will read a student enrollment CSV feed provided by the campus * registrar or data warehouse and "upsert" (insert/update) the feed into - * Submitty's course databases. Requires PHP 7.3 and pgsql extension. + * Submitty's course databases. Requires pgsql extension. * * @author Peter Bailie, Rensselaer Polytechnic Institute */ @@ -15,6 +15,7 @@ require __DIR__ . "/ssaf_cli.php"; require __DIR__ . "/ssaf_db.php"; require __DIR__ . "/ssaf_validate.php"; +require __DIR__ . "/ssaf_rcos.php"; // Important: Make sure we are running from CLI if (php_sapi_name() !== "cli") { @@ -27,22 +28,24 @@ /** primary process class */ class submitty_student_auto_feed { - /** @var resource File handle to read CSV */ + /** File handle to read CSV */ private $fh; - /** @var string Semester code */ - private $semester; - /** @var array List of courses registered in Submitty */ - private $course_list; - /** @var array Describes how courses are mapped from one to another */ - private $mapped_courses; - /** @var array Describes courses/sections that are duplicated to other courses/sections */ - private $crn_copymap; - /** @var array Courses with invalid data. */ - private $invalid_courses; - /** @var array All CSV data to be upserted */ - private $data; - /** @var string Ongoing string of messages to write to logfile */ - private $log_msg_queue; + /** Semester code */ + private string $semester; + /** List of courses registered in Submitty */ + private array $course_list; + /** Describes how courses are mapped from one to another */ + private array $mapped_courses; + /** Describes courses/sections that are duplicated to other courses/sections */ + private array $crn_copymap; + /** Courses with invalid data. */ + private array $invalid_courses; + /** All CSV data to be upserted */ + private array $data; + /** Ongoing string of messages to write to logfile */ + private string $log_msg_queue; + /** For special cases involving Renssealer Center for Open Source */ + private object $rcos; /** Init properties. Open DB connection. Open CSV file. */ public function __construct() { @@ -100,6 +103,9 @@ public function __construct() { // Get CRN shared courses/sections (when a course/section is copied to another course/section) $this->crn_copymap = $this->read_crn_copymap(); + // Helper object for special-cases involving RCOS. + $this->rcos = new rcos(); + // Init other properties. $this->invalid_courses = []; $this->data = []; @@ -135,8 +141,8 @@ public function go() { case $this->check_for_excessive_dropped_users(): // This check will block all upserts when an error is detected. exit(1); - case $this->check_for_duplicate_user_ids(): - $this->log_it("Duplicate user IDs detected in CSV file."); + case $this->filter_duplicate_registrations(): + // Never returns false. Error messages are already in log queue. break; case $this->invalidate_courses(): // Should do nothing when $this->invalid_courses is empty @@ -185,15 +191,15 @@ private function get_csv_data() { // Read and assign csv rows into $this->data array $row = fgetcsv($this->fh, 0, CSV_DELIM_CHAR); while(!feof($this->fh)) { - // Course is comprised of an alphabetic prefix and a numeric suffix. - $course = strtolower($row[COLUMN_COURSE_PREFIX] . $row[COLUMN_COURSE_NUMBER]); - // Trim whitespace from all fields in $row. array_walk($row, function(&$val, $key) { $val = trim($val); }); // Remove any leading zeroes from "integer" registration sections. if (ctype_digit($row[COLUMN_SECTION])) $row[COLUMN_SECTION] = ltrim($row[COLUMN_SECTION], "0"); + // Course is comprised of an alphabetic prefix and a numeric suffix. + $course = strtolower($row[COLUMN_COURSE_PREFIX] . $row[COLUMN_COURSE_NUMBER]); + switch(true) { // Check that $row has an appropriate student registration. case array_search($row[COLUMN_REGISTRATION], $all_valid_reg_codes) === false: @@ -212,6 +218,9 @@ private function get_csv_data() { // Check that $row is associated with the course list. case array_search($course, $this->course_list) !== false: if (validate::validate_row($row, $row_num)) { + // Check (and perform) special-case RCOS registration section mapping. + $this->rcos->map($course, $row); + // Include $row $this->data[$course][] = $row; @@ -233,8 +242,13 @@ private function get_csv_data() { if (array_key_exists($section, $this->mapped_courses[$course])) { $m_course = $this->mapped_courses[$course][$section]['mapped_course']; if (validate::validate_row($row, $row_num)) { - // Include $row. + // Do course mapping (alters registration section). $row[COLUMN_SECTION] = $this->mapped_courses[$course][$section]['mapped_section']; + + // Check (and override) for special-case RCOS registration section mapping. + $this->rcos->map($course, $row); + + // Include $row. $this->data[$m_course][] = $row; // $row with a blank email is allowed, but it is also logged. @@ -285,31 +299,31 @@ private function get_csv_data() { } /** - * Users cannot be registered to the same course multiple times. + * Students cannot be registered to the same course multiple times. * - * Any course with a user registered more than once is flagged invalid as - * it is indicative of data errors from the CSV file. - * - * @return bool always TRUE + * If multiple registrations for the same student and course are found, the first instance is allowed to be + * upserted to the database. All other instances are removed from the data set and therefore not upserted. */ - private function check_for_duplicate_user_ids() { - foreach($this->data as $course => $rows) { - $user_ids = null; - $d_rows = null; - // Returns FALSE (as in there is an error) when duplicate IDs are found. - // However, a duplicate ID does not invalidate a course. Instead, the - // first enrollment is accepted, the other enrollments are discarded, - // and the event is logged. - if (validate::check_for_duplicate_user_ids($rows, $user_ids, $d_rows) === false) { - foreach($d_rows as $user_id => $userid_rows) { - $length = count($userid_rows); - for ($i = 1; $i < $length; $i++) { - unset($this->data[$course][$userid_rows[$i]]); - } + private function filter_duplicate_registrations(): true { + foreach($this->data as $course => &$rows) { + usort($rows, function($a, $b) { return $a[COLUMN_USER_ID] <=> $b[COLUMN_USER_ID]; }); + $duplicated_ids = []; + $num_rows = count($rows); + + // We are iterating from bottom to top through a course's data set. Should we find a duplicate registration + // and unset it from the array, (1) we are unsetting duplicates starting from the bottom, (2) which preserves + // the first entry among duplicate entries, and (3) we do not make a comparison with a null key. + for ($j = $num_rows - 1, $i = $j - 1; $i >= 0; $i--, $j--) { + if ($rows[$i][COLUMN_USER_ID] === $rows[$j][COLUMN_USER_ID]) { + $duplicated_ids[] = $rows[$j][COLUMN_USER_ID]; + unset($rows[$j]); } + } + if (count($duplicated_ids) > 0) { + array_unique($duplicated_ids, SORT_STRING); $msg = "Duplicate user IDs detected in {$course} data: "; - $msg .= implode(", ", $user_ids); + $msg .= implode(", ", $duplicated_ids); $this->log_it($msg); } }