Skip to content

Commit d777590

Browse files
committed
fixed issues and extracted another helper function
1 parent cff5bad commit d777590

File tree

2 files changed

+45
-17
lines changed

2 files changed

+45
-17
lines changed

frontend/degree-plan/pages/OnboardingPage.tsx

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@ import { polyfillPromiseWithResolvers } from "./polyfilsResolver";
99

1010
import "core-js/full/promise/with-resolvers.js";
1111

12-
import { parseItems, parseTranscript } from "../utils/parseUtils";
12+
import { parseItems, parseTranscript, ParsedText, flattenParsedText } from "../utils/parseUtils";
1313
import WelcomeLayout from "@/components/OnboardingPanels/WelcomePanel";
1414
import CreateWithTranscriptPanel from "@/components/OnboardingPanels/CreateWithTranscriptPanel";
15+
1516
polyfillPromiseWithResolvers();
1617

1718
pdfjs.GlobalWorkerOptions.workerSrc = `//unpkg.com/pdfjs-dist@${pdfjs.version}/legacy/build/pdf.worker.min.mjs`;
@@ -44,24 +45,26 @@ const OnboardingPage = ({
4445
>(`/api/degree/degrees`);
4546

4647
// TRANSCRIPT PARSING
47-
const total = useRef<any>({});
48+
const total = useRef<Record<number, ParsedText[]>>({});
4849
const addText = (items: any[], index: number) => {
49-
const allText: any = parseItems(items);
50-
let textResult = [];
51-
for (let col in allText) {
52-
let poses = Object.keys(allText[col]).reverse();
53-
for (let i in poses) {
54-
textResult.push(allText[col][poses[i]].join("").toLowerCase());
55-
}
56-
total.current[index] = textResult;
57-
}
50+
const parsed = parseItems(items);
51+
total.current[index] = total.current[index] ?? [];
52+
total.current[index].push(parsed);
5853

5954
// If all pages have been read, begin to parse text from transcript
6055
if (Object.keys(total.current).length === numPages) {
61-
let all: any = [];
62-
for (let key in Object.keys(total.current).sort()) {
63-
all = all.concat(total.current[key]);
64-
}
56+
let all: string[] = [];
57+
const sortedPageIndexes = Object.keys(total.current)
58+
.map((key) => Number(key))
59+
.sort((a, b) => a - b);
60+
61+
sortedPageIndexes.forEach((pageIndex) => {
62+
const pageEntries = total.current[pageIndex];
63+
if (!pageEntries) return;
64+
pageEntries.forEach((pageText) => {
65+
all = all.concat(flattenParsedText(pageText));
66+
});
67+
});
6568

6669
const {
6770
scrapedCourses,

frontend/degree-plan/utils/parseUtils.ts

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,35 @@ type DegreeOption = {
1717
label: string;
1818
};
1919

20+
type ParsedTextColumn = Record<string, string[]>;
21+
22+
export type ParsedText = {
23+
col0: ParsedTextColumn;
24+
col1: ParsedTextColumn;
25+
};
26+
27+
// Given a parsed text object, return a flattened array of strings.
28+
export const flattenParsedText = (parsedText: ParsedText): string[] => {
29+
const columns: (keyof ParsedText)[] = ["col0", "col1"];
30+
const flattened: string[] = [];
31+
32+
columns.forEach((column) => {
33+
const columnRows = parsedText[column];
34+
const poses = Object.keys(columnRows).reverse();
35+
poses.forEach((pose) => {
36+
const row = columnRows[pose];
37+
flattened.push(row.join("").toLowerCase());
38+
});
39+
});
40+
41+
return flattened;
42+
};
43+
44+
2045
// Given a list of line items from the PDF, return an object of columns and the lines in each column.
2146
export const parseItems = (items: LineItem[]) => {
2247
// At most the transcript will have two columns - we account for that here.
23-
let allText: { col0: string[][]; col1: string[][] } = { col0: [], col1: [] };
48+
let allText: ParsedText = { col0: {}, col1: {} };
2449

2550
// Find x value for when second column begins using convenient lines.
2651
let maxCol = items.reduce(function (acc, el) {
@@ -41,7 +66,7 @@ export const parseItems = (items: LineItem[]) => {
4166

4267
// Ignore potential high school program transcript
4368
if (items[i].str === "Level:High School") {
44-
allText[currentCol] = [];
69+
allText[currentCol] = {};
4570
break;
4671
}
4772
if (pos in allText[currentCol])

0 commit comments

Comments
 (0)