% This script loops through the 'htmlTableToCell.m' script in order to
% quickly convert and save a batch of Orbit patent data files (saved as
% html tables) into .MAT files
clearvars
clear all
close all
tic()
% Request the user to choose the current working directory (NB: if no
% folder is selected in the user interface then the current working
% directory is taken to be the filepath) OR do not ask the user and just
% take the current directory:
% filepath = uigetdir;
filepath = pwd;
% Identify the name of the selected working directory:
[upperPath,deepestFolder] = fileparts(filepath);
% Identify html files in the selected working directory that match the name
% of the folder:
files = dir('*.html');
% Use first column header to identify the start of the table:
table.idTableBy.plaintextInFirstTD = 'Family Accession Nbr';
% Iterate through input .html files in the current folder directory:
for i = 1:numel(files)
% for i = 6:numel(files)
% for i = 1:3
    % Reset cell array:
    clearvars cell_array
    % Select the next html file to convert:
    name = files(i).name;
   
    % Identify the record numbers included in each html file (NB: '(\d*)'
    % selects any number of digits at this point in the string):
    batch_limits = regexp(name,strcat(deepestFolder,' patents \(Orbit search results (\d*) - (\d*)\).html'),'tokens');
   
    % Construct the variable name for this specific cell array:
    cell_array_name = [strrep(lower(deepestFolder),' ','_'),'_patents_',batch_limits{1,1}{1,1},'_',batch_limits{1,1}{1,2}];
   
    % Call the 'htmlTableToCell.m' script for the current html file:
    cell_array = htmlTableToCell(name,table);
   
    % Save the current cell array to a .MAT file:
%     savefile = [deepestFolder,' patent data ',batch_limits{1,1}{1,1},' - ',batch_limits{1,1}{1,2},' test','.mat'];
    savefile = [deepestFolder,' patent data ',batch_limits{1,1}{1,1},' - ',batch_limits{1,1}{1,2},' ',date,'.mat'];
    save(savefile,'cell_array');
   
    toc()
end
toc()