time | Calls | line |
---|
| | 164 | function b = categorical(inputData,varargin)
|
| | 165 | %CATEGORICAL Create a categorical array.
|
| | 166 | % C = CATEGORICAL(DATA) creates a categorical array from DATA. DATA is a
|
| | 167 | % numeric, logical, categorical array, string array, or a cell array of
|
| | 168 | % character vectors. CATEGORICAL creates categories in C from the sorted
|
| | 169 | % unique values in DATA.
|
| | 170 | %
|
| | 171 | % C = CATEGORICAL(DATA,VALUESET) creates a categorical array from DATA,
|
| | 172 | % with one category for each value in VALUESET. VALUESET is a vector
|
| | 173 | % containing unique values that can be compared to those in DATA using
|
| | 174 | % the equality operator. VALUESET often contains values not present in
|
| | 175 | % DATA. If DATA contains any values not present in VALUESET, the
|
| | 176 | % corresponding elements of C are undefined.
|
| | 177 | %
|
| | 178 | % C = CATEGORICAL(DATA,VALUESET,CATEGORYNAMES) creates a categorical
|
| | 179 | % array from DATA, naming the categories in C using CATEGORYNAMES.
|
| | 180 | % CATEGORYNAMES is a string array or a cell array of character vectors.
|
| | 181 | % CATEGORICAL assigns the names to C's categories in order according to
|
| | 182 | % the values in VALUESET.
|
| | 183 | %
|
| | 184 | % To merge multiple distinct values in DATA into a single category in C,
|
| | 185 | % provide duplicate names corresponding to those values.
|
| | 186 | %
|
| | 187 | % C = CATEGORICAL(DATA, ..., 'Ordinal',ORD) specifies whether C is ordinal,
|
| | 188 | % that is, if its categories have a mathematical ordering. If ORD is true,
|
| | 189 | % the values in C can be compared with the complete set of relational
|
| | 190 | % comparisons. If ORD is false (the default), the values in C can only be
|
| | 191 | % compared for equality. Discrete non-numeric data that are not ordinal are
|
| | 192 | % often referred to as "nominal" data.
|
| | 193 | %
|
| | 194 | % C = CATEGORICAL(DATA, ..., 'Protected',PROTECT) specifies whether or
|
| | 195 | % not C's categories are protected. If PROTECT is false (the default),
|
| | 196 | % new categories in C can be created automatically by assigning to C, and
|
| | 197 | % C (if it is not ordinal) can be combined with arrays that have
|
| | 198 | % different categories. If PROTECT is true, new categories in C must be
|
| | 199 | % added using the ADDCATS method, and C can not be combined with arrays
|
| | 200 | % that have different categories. Ordinal arrays are always protected.
|
| | 201 | %
|
| | 202 | % By default, an element of C is undefined if the corresponding element of
|
| | 203 | % DATA is NaN (when DATA is numeric), the empty string (when DATA
|
| | 204 | % contains strings), an empty character vector (when DATA contains a cell
|
| | 205 | % array of character vectors) or undefined (when DATA is categorical).
|
| | 206 | % CATEGORICAL treats such elements as "undefined" or "missing" and C does
|
| | 207 | % not include a category that they belong to. To create an explicit
|
| | 208 | % category for those elements instead of treating them as undefined, you
|
| | 209 | % must include NaN, the empty string, or an undefined element in
|
| | 210 | % VALUESET.
|
| | 211 | %
|
| | 212 | % Examples:
|
| | 213 | % % Create a categorical array from character vectors
|
| | 214 | % colors1 = categorical({'r' 'b' 'g'; 'g' 'r' 'b'; 'b' 'r' 'g'})
|
| | 215 | %
|
| | 216 | % colors2 = categorical({'r' 'b' 'g'; 'g' 'r' 'b'; 'b' 'r' 'g'}, ...
|
| | 217 | % {'r' 'g' 'b' 'p'},{'red' 'green' 'blue' 'purple'})
|
| | 218 | %
|
| | 219 | % % Create a categorical array from integer values
|
| | 220 | % sizes = categorical(randi([1 3],5,2),1:3,{'child' 'adult' 'senior'},'Ordinal',true)
|
| | 221 | %
|
| | 222 | % % Create a categorical array by binning continuous data
|
| | 223 | % x = rand(100,1);
|
| | 224 | % y = discretize(x,[0 .25 .75 1],'categorical',{'small', 'medium', 'large'});
|
| | 225 | % histogram(y)
|
| | 226 | %
|
| | 227 | % See also NOMINAL, ORDINAL, DISCRETIZE.
|
| | 228 |
|
| | 229 | import matlab.internal.datatypes.validateLogical
|
| | 230 |
|
< 0.001 | 3 | 231 | if nargin == 0
|
| | 232 | % Nothing to do
|
| | 233 | return
|
< 0.001 | 3 | 234 | end
|
| | 235 |
|
| | 236 | % Pull out optional positional inputs, which cannot be char
|
< 0.001 | 3 | 237 | if (nargin == 1) || isNVpair(varargin{1})
|
| | 238 | % categorical(inputData) or categorical(inputData,Name,Value,...)
|
< 0.001 | 2 | 239 | suppliedValueSet = false;
|
< 0.001 | 2 | 240 | suppliedCategoryNames = false;
|
< 0.001 | 1 | 241 | elseif (nargin == 2) || isNVpair(varargin{2})
|
| | 242 | % categorical(inputData,valueSet) or categorical(inputData,valueSet,Name,Value,...)
|
< 0.001 | 1 | 243 | suppliedValueSet = true;
|
< 0.001 | 1 | 244 | valueSet = varargin{1};
|
< 0.001 | 1 | 245 | suppliedCategoryNames = false;
|
< 0.001 | 1 | 246 | varargin = varargin(2:end);
|
| | 247 | else
|
| | 248 | % categorical(inputData,valueSet,categoryNames) or categorical(inputData,valueSet,categoryNames,Name,Value,...)
|
| | 249 | suppliedValueSet = true;
|
| | 250 | valueSet = varargin{1};
|
| | 251 | suppliedCategoryNames = true;
|
| | 252 | categoryNames = varargin{2};
|
| | 253 | varargin = varargin(3:end);
|
< 0.001 | 3 | 254 | end
|
| | 255 |
|
< 0.001 | 3 | 256 | pnames = {'Ordinal' 'Protected'};
|
< 0.001 | 3 | 257 | dflts = { false false };
|
0.003 | 3 | 258 | [isOrdinal,isProtected,supplied] = ...
|
| 3 | 259 | matlab.internal.datatypes.parseArgs(pnames, dflts, varargin{:}); %#ok<*PROP>
|
< 0.001 | 3 | 260 | isOrdinal = validateLogical(isOrdinal,'Ordinal');
|
< 0.001 | 3 | 261 | isProtected = validateLogical(isProtected,'Protected');
|
< 0.001 | 3 | 262 | if isOrdinal
|
| | 263 | if supplied.Protected
|
| | 264 | if ~isProtected
|
| | 265 | error(message('MATLAB:categorical:UnprotectedOrdinal'));
|
| | 266 | end
|
| | 267 | else
|
| | 268 | isProtected = true;
|
| | 269 | end
|
< 0.001 | 3 | 270 | end
|
< 0.001 | 3 | 271 | b.isOrdinal = isOrdinal;
|
< 0.001 | 3 | 272 | b.isProtected = isProtected;
|
| | 273 |
|
< 0.001 | 3 | 274 | if isa(inputData, 'missing')
|
| | 275 | inputData = string(inputData);
|
< 0.001 | 3 | 276 | end
|
| | 277 |
|
< 0.001 | 3 | 278 | iscellstrInput = iscellstr(inputData);
|
< 0.001 | 3 | 279 | isstringInput = isstring(inputData);
|
| | 280 |
|
| | 281 | % Catch some inputs that are specifically disallowed.
|
< 0.001 | 3 | 282 | if ischar(inputData)
|
| | 283 | error(message('MATLAB:categorical:CharData'));
|
< 0.001 | 3 | 284 | elseif matlab.internal.datatypes.istabular(inputData)
|
| | 285 | error(message('MATLAB:categorical:TableData'));
|
< 0.001 | 3 | 286 | end
|
| | 287 | % Remove spaces from cellstrs or strings
|
< 0.001 | 3 | 288 | if iscellstrInput || isstringInput
|
< 0.001 | 3 | 289 | inputData = strtrim(inputData);
|
< 0.001 | 3 | 290 | end
|
| | 291 |
|
| | 292 | % Input data set given explicitly, do not reorder them
|
< 0.001 | 3 | 293 | if suppliedValueSet
|
| | 294 | % input set can never be char, char is recognized as a param name
|
< 0.001 | 1 | 295 | iscellstrValueSet = iscellstr(valueSet); %#ok<ISCLSTR>
|
< 0.001 | 1 | 296 | isstringValueSet = isstring(valueSet);
|
| | 297 |
|
| | 298 | % Allow mixed input of string data and cellstr valueSet for
|
| | 299 | % convenience.
|
| | 300 | % (e.g. categorical(string({'a' 'b'...},{'a' 'b' 'c'}))
|
| | 301 | % Convert the valueSet to a string array so that it can proceed
|
| | 302 | % through the string code path
|
< 0.001 | 1 | 303 | if isstringInput && iscellstrValueSet
|
< 0.001 | 1 | 304 | valueSet = string(valueSet);
|
| | 305 | % reset flags after converting valueSet to string
|
| 1 | 306 | iscellstrValueSet = false;
|
< 0.001 | 1 | 307 | isstringValueSet = true;
|
| 1 | 308 | end
|
| | 309 |
|
< 0.001 | 1 | 310 | if iscellstrValueSet || isstringValueSet
|
< 0.001 | 1 | 311 | valueSet = strtrim(valueSet(:));
|
| | 312 | % unique will remove duplicate empty character vectors or strings
|
| | 313 | elseif isa(valueSet,'categorical')
|
| | 314 | % If both inputData and valueSet are ordinal, their categories must match,
|
| | 315 | % although the elements of valueSet might be a subset or reordering of that.
|
| | 316 | if isa(inputData,'categorical') && valueSet.isOrdinal
|
| | 317 | if ~isequal(inputData.categoryNames,valueSet.categoryNames)
|
| | 318 | error(message('MATLAB:categorical:ValuesetOrdinalCategoriesMismatch'));
|
| | 319 | end
|
| | 320 | end
|
| | 321 | valueSet.codes = valueSet.codes(:);
|
| | 322 | else
|
| | 323 | valueSet = valueSet(:);
|
< 0.001 | 1 | 324 | end
|
| | 325 |
|
| | 326 | % Catch multiple missing values in the valueSet, since unique treats them as
|
| | 327 | % distinct.
|
| 1 | 328 | try
|
< 0.001 | 1 | 329 | nmissing = sum(ismissing(valueSet));
|
| | 330 | catch % in case the valueset is made up of objects for which ismissing is not defined.
|
| | 331 | nmissing = 0;
|
< 0.001 | 1 | 332 | end
|
| 1 | 333 | if nmissing > 1
|
| | 334 | error(message('MATLAB:categorical:MultipleMissingInValueset'));
|
< 0.001 | 1 | 335 | end
|
| | 336 |
|
| 1 | 337 | try
|
0.002 | 1 | 338 | uvalueSet = unique(valueSet);
|
| | 339 | catch ME
|
| | 340 | throw(addCause(MException(message('MATLAB:categorical:UniqueMethodFailedValueset')),ME));
|
< 0.001 | 1 | 341 | end
|
< 0.001 | 1 | 342 | if length(uvalueSet) < length(valueSet)
|
| | 343 | error(message('MATLAB:categorical:DuplicatedValues'));
|
< 0.001 | 1 | 344 | end
|
| | 345 |
|
| | 346 | % Infer categories from categorical data's categories
|
< 0.001 | 2 | 347 | elseif isa(inputData,'categorical')
|
| | 348 | valueSet = categories(inputData);
|
| | 349 | icats = double(inputData.codes);
|
| | 350 | iscellstrValueSet = true;
|
| | 351 |
|
| | 352 | % Infer categories from the data, they are first sorted
|
< 0.001 | 2 | 353 | else % ~suppliedValueSet
|
| | 354 | % Numeric, logical, cellstr, or anything else that has a unique
|
| | 355 | % method, except char (already weeded out). Cellstr has already had
|
| | 356 | % leading/trailing spaces removed. Save the index vector for later.
|
< 0.001 | 2 | 357 | try
|
0.003 | 2 | 358 | [valueSet,~,icats] = unique(inputData(:));
|
| | 359 | catch ME
|
| | 360 | throw(addCause(MException(message('MATLAB:categorical:UniqueMethodFailedData')),ME));
|
< 0.001 | 2 | 361 | end
|
| | 362 |
|
| | 363 | % '' or NaN or <undefined> all become <undefined> by default, remove
|
| | 364 | % those from the list of categories.
|
| | 365 | % can assume the ValueSet has the same type as Input, because
|
| | 366 | % it's constructed from the input in this case.
|
< 0.001 | 2 | 367 | iscellstrValueSet = iscellstrInput;
|
< 0.001 | 2 | 368 | isstringValueSet = isstringInput;
|
< 0.001 | 2 | 369 | if iscellstrValueSet
|
< 0.001 | 2 | 370 | [valueSet,icats] = removeUtil(valueSet,icats,cellfun('isempty',valueSet));
|
| | 371 | elseif isstringValueSet
|
| | 372 | [valueSet,icats] = removeUtil(valueSet,icats,valueSet=="" | ismissing(valueSet));
|
| | 373 | elseif isa(valueSet,'categorical')
|
| | 374 | % can't use categorical subscripting on valueSet, go directly to the codes
|
| | 375 | [valueSet.codes,icats] = removeUtil(valueSet.codes,icats,isundefined(valueSet));
|
| | 376 | else
|
| | 377 | [valueSet,icats] = removeUtil(valueSet,icats,ismissing(valueSet));
|
< 0.001 | 2 | 378 | end
|
| 3 | 379 | end
|
| | 380 |
|
| | 381 | % Verify the number of categories before trying to do anything else.
|
< 0.001 | 3 | 382 | if length(valueSet) > categorical.maxNumCategories
|
| | 383 | error(message('MATLAB:categorical:MaxNumCategoriesExceeded',categorical.maxNumCategories));
|
< 0.001 | 3 | 384 | end
|
| | 385 |
|
| | 386 | % valueSet is a column vector at this point
|
| | 387 |
|
| | 388 | % Category names given explicitly, do not reorder them
|
< 0.001 | 3 | 389 | mergingCategories = false;
|
< 0.001 | 3 | 390 | if suppliedCategoryNames
|
| | 391 | categoryNames = checkCategoryNames(categoryNames,0); % error if '', or '<undefined>', but allow duplicates
|
| | 392 | if length(categoryNames) ~= length(valueSet)
|
| | 393 | if suppliedValueSet
|
| | 394 | error(message('MATLAB:categorical:WrongNumCategoryNamesValueset'));
|
| | 395 | else
|
| | 396 | error(message('MATLAB:categorical:WrongNumCategoryNames'));
|
| | 397 | end
|
| | 398 | end
|
| | 399 |
|
| | 400 | % If the category names contain duplicates, those will be merged
|
| | 401 | % into identical categories. Remove the duplicate names, put the
|
| | 402 | % categories corresponding to those names at the end so they'll
|
| | 403 | % be easier to remove, and create a map from categories to the
|
| | 404 | % ultimate internal codes.
|
| | 405 | [unames,i,j] = unique(categoryNames,'first');
|
| | 406 | mergingCategories = (length(unames) < length(categoryNames));
|
| | 407 | if mergingCategories
|
| | 408 | [i,iord] = sort(i);
|
| | 409 | iordinv(iord) = 1:length(iord); j = iordinv(j);
|
| | 410 | dups = setdiff(1:length(categoryNames),i);
|
| | 411 | categoryNames = categoryNames(i(:));
|
| | 412 | ord = [i(:); dups(:)];
|
| | 413 | valueSet = valueSet(ord);
|
| | 414 | mergeConvert(2:(length(ord)+1)) = j(ord);
|
| | 415 | end
|
| | 416 |
|
| | 417 | b.categoryNames = cellstr(categoryNames);
|
| | 418 |
|
| | 419 | % Infer category names from the input data set, which in turn may be
|
| | 420 | % inferred from the input data. The value set has already been unique'd
|
| | 421 | % and turned into a column vector
|
< 0.001 | 3 | 422 | elseif ~isempty(valueSet) % if valueSet is empty, no need to create names
|
< 0.001 | 3 | 423 | if isnumeric(valueSet)
|
| | 424 | if isfloat(valueSet) && any(valueSet ~= round(valueSet))
|
| | 425 | % Create names using 5 digits. If that fails to create
|
| | 426 | % unique names, the caller will have to provide names.
|
| | 427 | b.categoryNames = strtrim(cellstr(num2str(valueSet,'%-0.5g')));
|
| | 428 | else
|
| | 429 | % Create names that preserve all digits of integers and
|
| | 430 | % (up to 16 digits of) flints.
|
| | 431 | b.categoryNames = strtrim(cellstr(num2str(valueSet)));
|
| | 432 | end
|
| | 433 | if length(unique(b.categoryNames)) < length(b.categoryNames)
|
| | 434 | error(message('MATLAB:categorical:CantCreateCategoryNames'));
|
| | 435 | end
|
< 0.001 | 3 | 436 | elseif islogical(valueSet)
|
| | 437 | categoryNames = {'false'; 'true'};
|
| | 438 | b.categoryNames = categoryNames(valueSet+1);
|
| | 439 | % elseif ischar(valueSet)
|
| | 440 | % Char valueSet is not possible
|
< 0.001 | 3 | 441 | elseif iscellstrValueSet
|
| | 442 | % These may be specifying character values, or they may be
|
| | 443 | % specifying categorical values via their names.
|
| | 444 |
|
| | 445 | % We will not attempt to create a name for the empty char
|
| | 446 | % vectors or the undefined categorical label. Names must
|
| | 447 | % given explicitly.
|
< 0.001 | 2 | 448 | if matches(categorical.undefLabel,valueSet) %undefLabel is scalar
|
| | 449 | error(message('MATLAB:categorical:UndefinedLabelCategoryName', categorical.undefLabel));
|
< 0.001 | 2 | 450 | elseif matches(categorical.missingLabel,valueSet) %missingLabel is scalar
|
| | 451 | error(message('MATLAB:categorical:UndefinedLabelCategoryName', categorical.missingLabel));
|
< 0.001 | 2 | 452 | elseif matches("",valueSet)
|
| | 453 | error(message('MATLAB:categorical:EmptyCategoryName'));
|
< 0.001 | 2 | 454 | end
|
| | 455 | % Don't try to make names out of things that aren't character vectors.
|
< 0.001 | 2 | 456 | if ~all(cellfun('size',valueSet,1) == 1)
|
| | 457 | error(message('MATLAB:categorical:CantCreateCategoryNames'));
|
< 0.001 | 2 | 458 | end
|
< 0.001 | 2 | 459 | b.categoryNames = valueSet(:);
|
< 0.001 | 1 | 460 | elseif isstringValueSet
|
| | 461 | % Similar to cellstr case
|
| | 462 | % We will not attempt to create a name for the empty string or
|
| | 463 | % the undefined categorical label. Names must given explicitly.
|
< 0.001 | 1 | 464 | if any(valueSet == categorical.undefLabel)
|
| | 465 | error(message('MATLAB:categorical:UndefinedLabelCategoryName', categorical.undefLabel));
|
< 0.001 | 1 | 466 | elseif any(valueSet == categorical.missingLabel)
|
| | 467 | error(message('MATLAB:categorical:UndefinedLabelCategoryName', categorical.missingLabel));
|
< 0.001 | 1 | 468 | elseif any(valueSet=='')
|
| | 469 | error(message('MATLAB:categorical:EmptyCategoryName'));
|
| 1 | 470 | end
|
< 0.001 | 1 | 471 | b.categoryNames = cellstr(valueSet);
|
| | 472 | elseif isa(valueSet,'categorical')
|
| | 473 | % We will not attempt to create a name for an undefined
|
| | 474 | % categorical element. Names must given explicitly.
|
| | 475 | if any(isundefined(valueSet))
|
| | 476 | error(message('MATLAB:categorical:UndefinedInValueset'));
|
| | 477 | end
|
| | 478 | bnames = cellstr(valueSet); % can't use categorical subscripting to
|
| | 479 | b.categoryNames = bnames(:); % get a col, force the cellstr instead
|
| | 480 | else
|
| | 481 | % Anything else that has a char method
|
| | 482 | try
|
| | 483 | charcats = char(valueSet); % valueSet a column vector
|
| | 484 | catch ME
|
| | 485 | if suppliedValueSet
|
| | 486 | m = message('MATLAB:categorical:CharMethodFailedValueset');
|
| | 487 | else
|
| | 488 | m = message('MATLAB:categorical:CharMethodFailedData');
|
| | 489 | end
|
| | 490 | throw(addCause(MException(m),ME));
|
| | 491 | end
|
| | 492 | if ~ischar(charcats) || (size(charcats,1) ~= numel(valueSet))
|
| | 493 | if suppliedValueSet
|
| | 494 | error(message('MATLAB:categorical:CharMethodFailedValuesetNumRows'));
|
| | 495 | else
|
| | 496 | error(message('MATLAB:categorical:CharMethodFailedDataNumRows'));
|
| | 497 | end
|
| | 498 | end
|
| | 499 |
|
| | 500 | catNames = strtrim(cellstr(charcats));
|
| | 501 | if length(unique(catNames)) ~= numel(valueSet)
|
| | 502 | if isa(valueSet,'datetime')
|
| | 503 | valueSet.Format = 'default';
|
| | 504 | catNames = strtrim(cellstr(valueSet));
|
| | 505 | if length(unique(catNames)) ~= numel(valueSet)
|
| | 506 | error(message('MATLAB:categorical:DuplicatedCatNamesDatetime'));
|
| | 507 | end
|
| | 508 | elseif isa(valueSet,'duration')
|
| | 509 | error(message('MATLAB:categorical:DuplicatedCatNamesDuration'));
|
| | 510 | else
|
| | 511 | error(message('MATLAB:categorical:DuplicatedCatNames'));
|
| | 512 | end
|
| | 513 | end
|
| | 514 | b.categoryNames = catNames;
|
< 0.001 | 3 | 515 | end
|
< 0.001 | 3 | 516 | end
|
| | 517 |
|
| | 518 | % Assign category codes to each element of output
|
< 0.001 | 3 | 519 | codes = zeros(size(inputData),categorical.defaultCodesClass); % small as possible
|
< 0.001 | 3 | 520 | b.codes = categorical.castCodes(codes,length(b.categoryNames)); % only as big as needed
|
< 0.001 | 3 | 521 | if ~suppliedValueSet
|
| | 522 | % If we already have indices into categories because it was created by
|
| | 523 | % calling unique(inputData), use those and save a call to ismember.
|
< 0.001 | 2 | 524 | b.codes(:) = icats(:);
|
< 0.001 | 1 | 525 | else
|
< 0.001 | 1 | 526 | if isnumeric(inputData)
|
| | 527 | if ~isnumeric(valueSet)
|
| | 528 | error(message('MATLAB:categorical:NumericTypeMismatchValueSet'));
|
| | 529 | end
|
| | 530 | [~,b.codes(:)] = ismember(inputData,valueSet);
|
| | 531 | % NaN may have been given explicitly as a category, but there's
|
| | 532 | % at most one by now
|
| | 533 | if any(isnan(valueSet))
|
| | 534 | b.codes(isnan(inputData)) = find(isnan(valueSet));
|
| | 535 | end
|
< 0.001 | 1 | 536 | elseif islogical(inputData)
|
| | 537 | if islogical(valueSet)
|
| | 538 | % OK, nothing to do
|
| | 539 | elseif isnumeric(valueSet)
|
| | 540 | valueSet = logical(valueSet);
|
| | 541 | else
|
| | 542 | error(message('MATLAB:categorical:TypeMismatchValueset'));
|
| | 543 | end
|
| | 544 | trueCode = find(valueSet);
|
| | 545 | falseCode = find(~valueSet);
|
| | 546 | % Already checked that valueSet contains unique values, but
|
| | 547 | % still need to make sure it has at most one non-zero.
|
| | 548 | if length(trueCode) > 1
|
| | 549 | error(message('MATLAB:categorical:DuplicatedLogicalValueset'));
|
| | 550 | end
|
| | 551 | if ~isempty(trueCode), b.codes(inputData) = trueCode; end
|
| | 552 | if ~isempty(falseCode), b.codes(~inputData) = falseCode; end
|
< 0.001 | 1 | 553 | elseif iscellstrInput
|
| | 554 | if ~(iscellstrValueSet||isstringValueSet) % ismember requires that both inputs be of the same type
|
| | 555 | error(message('MATLAB:categorical:TypeMismatchValueset'));
|
| | 556 | end
|
| | 557 | % inputData and valueSet have already had leading/trailing spaces removed
|
| | 558 | [~,b.codes(:)] = ismember(inputData,valueSet);
|
< 0.001 | 1 | 559 | elseif isstringInput
|
< 0.001 | 1 | 560 | if ~(iscellstrValueSet||isstringValueSet) % ismember requires that both inputs be of the same type
|
| | 561 | error(message('MATLAB:categorical:TypeMismatchValueset'));
|
< 0.001 | 1 | 562 | end
|
| | 563 | % inputData and valueSet have already had leading/trailing spaces removed
|
0.005 | 1 | 564 | [~,b.codes(:)] = ismember(inputData,valueSet);
|
< 0.001 | 1 | 565 | if any(ismissing(valueSet))
|
| | 566 | b.codes(ismissing(inputData)) = find(ismissing(valueSet));
|
< 0.001 | 1 | 567 | end
|
| | 568 |
|
| | 569 | elseif isa(inputData,'categorical')
|
| | 570 | % This could be done in the generic case that follows, but this
|
| | 571 | % should be faster.
|
| | 572 | convert = zeros(1,length(inputData.categoryNames)+1,'like',b.codes);
|
| | 573 | if isa(valueSet,class(inputData))
|
| | 574 | undef = find(isundefined(valueSet)); % at most 1 by now
|
| | 575 | if ~isempty(undef), convert(1) = undef(1); end
|
| | 576 | valueSet = cellstr(valueSet); iscellstrValueSet = true; %#ok<NASGU>
|
| | 577 | elseif iscellstrValueSet || isstringValueSet
|
| | 578 | % Leave them alone
|
| | 579 | else
|
| | 580 | error(message('MATLAB:categorical:TypeMismatchValueset'));
|
| | 581 | end
|
| | 582 | [~,convert(2:end)] = ismember(inputData.categoryNames,valueSet);
|
| | 583 | b.codes(:) = reshape(convert(inputData.codes+1), size(inputData.codes));
|
| | 584 | else % anything else that has an eq method, except char (already weeded out)
|
| | 585 | if ~isa(valueSet,class(inputData))
|
| | 586 | error(message('MATLAB:categorical:TypeMismatchValueset'));
|
| | 587 | end
|
| | 588 | try
|
| | 589 | for i = 1:length(valueSet)
|
| | 590 | b.codes(inputData==valueSet(i)) = i;
|
| | 591 | end
|
| | 592 | catch ME
|
| | 593 | throw(addCause(MException(message('MATLAB:categorical:EQMethodFailedDataValueset')),ME));
|
| | 594 | end
|
< 0.001 | 1 | 595 | end
|
< 0.001 | 3 | 596 | end
|
| | 597 |
|
| | 598 | % Merge categories that were given identical names.
|
< 0.001 | 3 | 599 | if mergingCategories
|
| | 600 | b.codes(:) = reshape(mergeConvert(b.codes+1),size(b.codes)); % Cast to type of b.codes
|
| 3 | 601 | end
|
| | 602 |
|
< 0.001 | 3 | 603 | end % categorical constructor
|
Other subfunctions in this file are not included in this listing.