Working with PDFs can be annoying.
But, we are going to make it easy. First you need to:
Make a folder in Google Drive
Drop a PDF in the folder
Go to the sharing and grab the URL, it should have a /file/d/ in the string
In Google Apps Script, make a new script, and add these services:
The Apps Script Primer is here if you have no idea what that meant.
The process works like this:
Insert the url
Script converts the data to a Google Doc
Google Doc is read into a container; The script allows you to set a word limit; default is 2000
Data is accessible
Paid subscribers get the next level, sending the data to OpenAI API
The PDF Conversion Code
function convertPdfAndStoreLimitedContents(aiRequest, inputUrl) {
// Default PDF URL if none is provided
var pdfUrl = inputUrl || 'https://drive.google.com/file/d/YOUR ID GOES HERE/view?usp=drive_link';
var theAiRequest = aiRequest || "Summarize this into three paragraphs.";
try {
// Extract the file ID from the URL
var pdfFileId = extractFileIdFromUrl(pdfUrl);
// Convert the PDF to a Google Doc
var googleDocId = convertPdfToGoogleDoc(pdfFileId);
// Store the limited contents of the Google Doc
var documentContents = storeLimitedGoogleDocContents(googleDocId, 2000);
// Log the contents
Logger.log('Converted Document Contents:');
Logger.log(documentContents);
} catch (error) {
Logger.log('Error: ' + error.message);
}
}
function extractFileIdFromUrl(url) {
// Extract the file ID from the full URL
var regex = /\/d\/(.*?)\//;
var match = url.match(regex);
if (match && match[1]) {
return match[1];
} else {
throw new Error('Invalid URL. Unable to extract file ID.');
}
}
function convertPdfToGoogleDoc(pdfFileId) {
// Get the PDF file
var pdfFile = DriveApp.getFileById(pdfFileId);
// Get the parent folder of the PDF
var parentFolder = pdfFile.getParents().next();
// Create a new Google Doc from the PDF
var convertedFile = Drive.Files.copy(
{
title: pdfFile.getName().replace('.pdf', '') + ' (Converted)',
mimeType: MimeType.GOOGLE_DOCS
},
pdfFileId
);
// Move the converted file to the same location as the PDF
var googleDocFile = DriveApp.getFileById(convertedFile.id);
parentFolder.addFile(googleDocFile);
DriveApp.getRootFolder().removeFile(googleDocFile); // Remove from "My Drive"
// Log the new file's details
Logger.log('Converted File Name: ' + googleDocFile.getName());
Logger.log('Converted File URL: ' + googleDocFile.getUrl());
// Return the Google Doc ID
return googleDocFile.getId();
}
function storeLimitedGoogleDocContents(googleDocId, wordLimit) {
// Access the Google Document
var document = DocumentApp.openById(googleDocId);
// Get the body text
var bodyText = document.getBody().getText();
// Split the text into words
var words = bodyText.split(/\s+/);
// Check if the text exceeds the word limit
if (words.length > wordLimit) {
// Truncate the text to the word limit
words = words.slice(0, wordLimit);
}
// Join the words back into a single string
var limitedText = words.join(' ');
// Store text in a container
var textContainer = {
fullText: limitedText,
wordCount: words.length
};
// Return the container
return textContainer;
}
The Output should look like this:
If you are a paid subscriber, there is an additional script below that incorporates the AI function and the AI function itself.
Consider signing up to fuel my dedication to serverless programming.