what is going on

This commit is contained in:
Frank Delaguila
2022-10-04 20:46:30 -06:00
commit 001e362d38
14 changed files with 8399 additions and 0 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
node_modules/
node_modules/puppeteer/

84
db.js Normal file
View File

@@ -0,0 +1,84 @@
const dbData = {
"2021": {
"passCompletions": 18.1,
"passAttempts": 28.6
},
"2022": {
"passCompletions": 20.2,
"passAttempts": 29
},
"games": [
{
"opponent": "florida",
"date": "Sep 3, 2022",
"utah_score": 26,
"opponent_points": 29
},
{
"opponent": "southernutah",
"date": "Sep 10, 2022",
"utah_score": 73,
"opponent_points": 7
},
{
"opponent": "sandiegostate",
"date": "Sep 17, 2022",
"utah_score": 35,
"opponent_points": 7
},
{
"opponent": "arizonastate",
"date": "Sep 24, 2022",
"utah_score": 34,
"opponent_points": 13
},
{
"opponent": "oregonstate",
"date": "Oct 1, 2022",
"utah_score": 42,
"opponent_points": 16
},
{
"opponent": "ucla",
"date": "Oct 8, 2022",
"utah_score": 0,
"opponent_points": 0
},
{
"opponent": "usc",
"date": "Oct 15, 2022",
"utah_score": 0,
"opponent_points": 0
},
{
"opponent": "washingtonstate",
"date": "Oct 27, 2022",
"utah_score": 0,
"opponent_points": 0
},
{
"opponent": "arizona",
"date": "Nov 5, 2022",
"utah_score": 0,
"opponent_points": 0
},
{
"opponent": "stanford",
"date": "Nov 12, 2022",
"utah_score": 0,
"opponent_points": 0
},
{
"opponent": "oregon",
"date": "Nov 19, 2022",
"utah_score": 0,
"opponent_points": 0
},
{
"opponent": "colorado",
"date": "Nov 26, 2022",
"utah_score": 0,
"opponent_points": 0
}
]
}

88
db.json Normal file
View File

@@ -0,0 +1,88 @@
{
"games": [
{
"florida": {
"date": "Sep 3, 2022",
"utah_score": 26,
"opponent_points": 29
}
},
{
"southernutah": {
"date": "Sep 10, 2022",
"utah_score": 73,
"opponent_points": 7
}
},
{
"sandiegostate": {
"date": "Sep 17, 2022",
"utah_score": 35,
"opponent_points": 7
}
},
{
"arizonastate": {
"date": "Sep 24, 2022",
"utah_score": 34,
"opponent_points": 13
}
},
{
"oregonstate": {
"date": "Oct 1, 2022",
"utah_score": 42,
"opponent_points": 16
}
},
{
"ucla": {
"date": "Oct 8, 2022",
"utah_score": 0,
"opponent_points": 0
}
},
{
"usc": {
"date": "Oct 15, 2022",
"utah_score": 0,
"opponent_points": 0
}
},
{
"washingtonstate": {
"date": "Oct 27, 2022",
"utah_score": 0,
"opponent_points": 0
}
},
{
"arizona": {
"date": "Nov 5, 2022",
"utah_score": 0,
"opponent_points": 0
}
},
{
"stanford": {
"date": "Nov 12, 2022",
"utah_score": 0,
"opponent_points": 0
}
},
{
"oregon": {
"date": "Nov 19, 2022",
"utah_score": 0,
"opponent_points": 0
}
},
{
"colorado": {
"date": "Nov 26, 2022",
"utah_score": 0,
"opponent_points": 0
}
}
]
}

BIN
hero-background.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 MiB

71
index.html Normal file
View File

@@ -0,0 +1,71 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Document</title>
<link rel="stylesheet" href="./styles.css"></link>
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
</head>
<body>
<main>
<section class="bg-center bg-scroll bg-cover flex hero h-96 mx-auto items-center justify-end overflow-hidden relative" style="background-image: url(./hero-background.jpeg)">
<div class="relative z-50 mr-40">
<h1 class="text-4xl font-black text-white">Utes Stats</h1>
<p class="text-white font-medium">I have no idea what I'm doing!</p>
</div>
</section>
<div class="container mx-auto px-4">
<section class="py-12" aria-label="Team Statistics">
<h2 class="text-2xl font-black text-center mb-12 section-header">2021-2022 Stat Comparisons</h2>
<div class="flex justify-around">
<div class="stat-container">
<h2 class="font-black">Pass Completions</h2>
<canvas id="passCompletions"></canvas>
</div>
<div class="stat-container">
<h2 class="font-black">Pass Attempts</h2>
<canvas id="passAttempts"></canvas>
</div>
</div>
</section>
<section class="py-12" aria-label="Team Game Schedule">
<h2 class="text-2xl font-black text-center mb-12 section-header">Game Schedule</h2>
<div class="items-center flex justify-evenly">
<div class="text-center">
<h2 id="utahScore" class="text-4xl font-black text-red">36</h2>
<h3 id="opponent" class="text-xl uppercase font-black">Utah</h3>
</div>
<div class="text-center">
<h2 class="text-4xl uppercase font-black game-versus">VS</h2>
<h3 class="text-2xl uppercase font-black">Sep 3, 2022</h3>
</div>
<div class="text-center">
<h2 id="opponentScore" class="text-4xl font-black text-red">29</h2>
<h3 id="opponent" class="text-xl uppercase font-black">Florida</h3>
</div>
</div>
</section>
</div>
</main>
<footer class="text-center py-12 text-white">
<p>* All this data is Web Scraped from <a class="text-red font-medium" href="https://www.sports-reference.com">Sports Reference</a></p>
</footer>
<script src="./db.js"></script>
<script src="./scripts.js"></script>
</body>
</html>

6
jest-puppeteer.config.js Normal file
View File

@@ -0,0 +1,6 @@
module.exports = {
launch: {
headless: true,
slowMo: 30
}
};

7
jest.config.js Normal file
View File

@@ -0,0 +1,7 @@
const config = {
verbose: true,
preset: 'jest-puppeteer',
setupFilesAfterEnv: ["expect-puppeteer"]
};
module.exports = config;

7868
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

21
package.json Normal file
View File

@@ -0,0 +1,21 @@
{
"name": "web-scraper",
"version": "1.0.0",
"description": "",
"main": "scripts.js",
"scripts": {
"test": "jest"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"expect-puppeteer": "^6.1.1",
"jest": "^29.1.2",
"jest-puppeteer": "^6.1.1",
"puppeteer": "^18.0.5",
"puppeteer-extra": "^3.3.4",
"puppeteer-extra-plugin-adblocker": "^2.13.5",
"puppeteer-extra-plugin-stealth": "^2.11.1"
}
}

136
scraper.js Normal file
View File

@@ -0,0 +1,136 @@
const puppeteer = require('puppeteer-extra');
const { writeFile } = require('fs').promises;
/*************************************
* THIS IS WHAT MAKES IT WORK
* SHOUT OUT JORDAN HANSEN - https://cobaltintelligence.com/blog/avoid-being-blocked-with-puppeteer/
* ALSO ANOTHER WAY TO BLOCK ALL ADS IS TO INTERCEPT ALL REQUESTS, AND FILTER ALL REQUESTS FOR THESE DOMAINS:
* https://winhelp2002.mvps.org/hosts.txt
* Example Code:
* //now we read the host file
var hostFile = fs.readFileSync('hosts.txt', 'utf8').split('\n');
var hosts = {};
for (var i = 0; i < hostFile.length; i++) {
var frags = hostFile[i].split(' ');
if (frags.length > 1 && frags[0] === '0.0.0.0') {
hosts[frags[1].trim()] = true;
}
}
* page.on('request', request => {
var domain = null;
if (task.input.blockads) {
var frags = request.url().split('/');
if (frags.length > 2) {
domain = frags[2];
}
}
if ((task.input.blockads && hosts[domain] === true) || (!task.input.includephotos && request.resourceType() === 'image')) {
request.abort();
}
else {
request.continue();
}
});
*************************************/
// const StealthPlugin = require('puppeteer-extra-plugin-stealth');
// puppeteer.use(StealthPlugin());
const AdblockerPlugin = require('puppeteer-extra-plugin-adblocker');
/************************************/
puppeteer
.use(AdblockerPlugin({blockTrackers: true}))
.launch({headless: false})
.then( async browser => {
const schedulePage = await browser.newPage();
await schedulePage.goto('https://www.sports-reference.com/cfb/schools/utah/2022-schedule.html');
const previousYearStatisticsPage = await browser.newPage();
await previousYearStatisticsPage.goto('https://www.sports-reference.com/cfb/schools/utah/2021.html');
const currentYearStatisticsPage = await browser.newPage();
await currentYearStatisticsPage.goto('https://www.sports-reference.com/cfb/schools/utah/2022.html');
// START BUILDING THE DATA PER PAGE
// Previous years statistics
const previousYearStatistics = await previousYearStatisticsPage.evaluate( () => {
let team = {
2021: {}
};
let passCompletion = document.querySelector( '.stats_table tr[data-row="0"] td[data-stat="pass_cmp"]' );
let passAttempts = document.querySelector( '.stats_table tr[data-row="0"] td[data-stat="pass_att"]' );
team[2021].passCompletions = Number(passCompletion.textContent);
team[2021].passAttempts = Number(passAttempts.textContent);
return team;
} );
// Current years statistics
const currentYearStatistics = await currentYearStatisticsPage.evaluate( () => {
let team = {
2022: {}
};
let passCompletion = document.querySelector( '.stats_table tr[data-row="0"] td[data-stat="pass_cmp"]' );
let passAttempts = document.querySelector( '.stats_table tr[data-row="0"] td[data-stat="pass_att"]' );
team[2022].passCompletions = Number(passCompletion.textContent);
team[2022].passAttempts = Number(passAttempts.textContent);
return team;
} );
const scheduleResults = await schedulePage.evaluate( (() => {
// Initialize data object
let team = {
games: []
};
// Grab HTML Nodes from the document
let dates = document.querySelectorAll( '#schedule td[data-stat="date_game"]' );
let opponents = document.querySelectorAll( '#schedule td[data-stat="opp_name"]' );
let utah_score = document.querySelectorAll( '#schedule td[data-stat="points"]' );
let opponent_points = document.querySelectorAll( '#schedule td[data-stat="opp_points"]' );
// Loop through the HTML Nodes, and push the textContent to the Team Games array!
// Example:
// "Florida": {
// games: [
// { date: 'Sep 3, 2022', utah_score: 26, opponent_points: 29 }
// ...
// ]
// }
dates.forEach( (date, index) => {
let key = opponents[index].textContent.replace( /([([)0-9^\s])/g, '' ).toLowerCase().trim();
team.games.push({
opponent: key,
date: date.textContent,
utah_score: utah_score[index].textContent !== '' ? Number(utah_score[index].textContent) : 0,
opponent_points: opponent_points[index].textContent !== '' ? Number(opponent_points[index].textContent) : 0
}
);
} );
// Finally return the data!
return team;
}) );
// END BUILDING THE DATA PER PAGE
// Convert data to JSON, and write the file!
// NOTE: NEEDED TO CHANGE FILE TYPE TO .JS TO BE ABLE TO USE IT LOCALLY FOR CHARTS
// I COULD SPIN UP AN EXPRESS SERVER, AND SERVE THAT JSON FILE ON AN API ENDPOINT OR JUST MAKE ENDPOINTS TO RETURN THIS DATA
scheduleResults[2021] = previousYearStatistics['2021'];
scheduleResults[2022] = currentYearStatistics['2022'];
let data = JSON.stringify( scheduleResults, null, 2 );
console.log( data );
writeFile('db.js', `const dbData = ${data}`, 'utf8');
await browser.close();
} )
.catch( (err) => console.log(err) );

12
scraper.test.js Normal file
View File

@@ -0,0 +1,12 @@
require('expect-puppeteer');
describe( 'Google', () => {
beforeAll( async () => {
await page.goto('https://www.sports-reference.com/cfb/schools/utah/2022-schedule.html', { waitUntil: 'domcontentloaded' });
} );
it( 'should open a new page, and match title.', async () => {
const title = await page.title();
expect(title).toMatch("2022 Utah Utes Schedule and Results | College Football at Sports-Reference.com");
} );
} );

67
scripts.js Normal file
View File

@@ -0,0 +1,67 @@
console.log( dbData );
const passCompletions = new Chart(document.getElementById('passCompletions'), {
type: 'bar',
data: {
labels: ['2021 Pass Completions', '2022 Pass Completions'],
datasets: [{
data: [dbData['2021'].passCompletions, dbData['2022'].passCompletions],
backgroundColor: [
'#c00',
'#c00'
],
borderColor: [
'#c00',
'#c00'
],
borderWidth: 1
}]
},
options: {
plugins: {
legend: {
display: false
}
},
scales: {
y: {
beginAtZero: true
}
}
}
});
const passAttempts = new Chart(document.getElementById('passAttempts'), {
type: 'bar',
data: {
labels: ['2021 Pass Attempts', '2022 Pass Attempts'],
datasets: [{
data: [dbData['2021'].passAttempts, dbData['2022'].passAttempts],
backgroundColor: [
'#c00',
'#c00'
],
borderColor: [
'#c00',
'#c00'
],
borderWidth: 1
}]
},
options: {
plugins: {
legend: {
display: false
}
},
scales: {
y: {
beginAtZero: true
}
}
}
});
dbData.games.forEach( game => {
console.log( game );
} );

37
styles.css Normal file
View File

@@ -0,0 +1,37 @@
.hero:before {
display: block;
background-color: #c00;
mix-blend-mode: multiply;
content: '';
width: 100%;
height: 100%;
position: absolute;
transform: translateX(50%) skew(-40deg);
}
.text-red {
color: #c00;
}
.stat-container {
border: 1px solid #efefef;
padding: 1rem;
border-radius: 0.25rem;
}
.section-header:after {
display: block;
content: '';
width: 4rem;
background-color: #c00;
height: 1px;
margin: 1rem auto;
}
.game-versus {
color: #c00;
}
footer {
background-color: #18191B;
}