Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members

String.cc

Go to the documentation of this file.
00001 /*
00002  * String.cc
00003  *
00004  * Smalltalk like class library for C++
00005  * Textual string.
00006  *
00007  * Copyright (c) 2003-6 Milan Cermak
00008  */
00009 /*
00010  * This library is free software; you can redistribute it and/or
00011  * modify it under the terms of the GNU Lesser General Public
00012  * License as published by the Free Software Foundation; either
00013  * version 2.1 of the License, or (at your option) any later version.
00014  *
00015  * This library is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018  * Lesser General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU Lesser General Public
00021  * License along with this library; if not, write to the Free Software
00022  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023  */
00024 #include <stdio.h>
00025 #include <string.h>
00026 #include <stdarg.h>
00027 
00028 #include <stlib/String.h>
00029 #include <stlib/Array.h>
00030 #include <stlib/ByteArray.h>
00031 #include <stlib/ByteString.h>
00032 #include <stlib/Character.h>
00033 #include <stlib/CharacterArray.h>
00034 #include <stlib/EncodedStream.h>
00035 #include <stlib/InternalEncodedStreamFactory.h>
00036 #include <stlib/Number.h>
00037 #include <stlib/OrderedCollection.h>
00038 #include <stlib/Stream.h>
00039 #include <stlib/TwoByteString.h>
00040 #include <stlib/Visitor.h>
00041 #include <stlib/WriteStream.h>
00042 #include <stlib/SOOBoundsError.h>
00043 
00044 String::String(long size)
00045 {
00046     if (size < 0) {
00047         fprintf(stderr, "Warning: Constructing String with negative size. Creating empty.\n");
00048         size = 0;
00049     }
00050     implementor = new ByteString(size);
00051 }
00052 
00053 String::String(const char *str)
00054 {
00055     if (str == NULL) {
00056         fprintf(stderr, "Warning: Constructing String from NULL argument. Creating empty.\n");
00057         implementor = new ByteString(0);
00058     } else {
00059         int length = strlen(str);
00060         implementor = new ByteString(length);
00061         for (int i = 0; i < length; i++) {
00062             if (str[i] < 0) {
00063                 (new Error(new String("Illegal character. 7-bit only accepted."),
00064                            new String(__PRETTY_FUNCTION__)))->raise();
00065             }
00066             implementor->put(i, Character::value((unsigned char) str[i]));
00067         }
00068     }
00069 }
00070 
00071 String::String(const ByteArray *byteArray)
00072 {
00073     long length = byteArray->size();
00074     const unsigned char *ba = byteArray->rawBytesReadOnly();
00075     implementor = new ByteString(length);
00076     for (long i = 0; i < length; i++) {
00077         implementor->put(i, Character::value(ba[i]));
00078     }
00079 }
00080 
00081 String::~String(void)
00082 {
00083     delete implementor;
00084 }
00085 
00086 /* Copying protocol */
00087 String::String(const String &origin)
00088 {
00089     long tally = origin.size();
00090     implementor = new ByteString(tally);
00091     for (long i = 0; i < tally; i++) {
00092                 put(i, dynamic_cast<Character *>(origin.at(i)));
00093     }
00094 }
00095 
00096 /* Class-accessing protocol */
00097 String *String::className(void) const
00098 {
00099     return new String("String");
00100 }
00101 
00102 /* Class-instance creation protocol */
00103 String *String::with(Character *object)
00104 {
00105     String *newString = new String(1);
00106     newString->put(0, object);
00107     return newString;
00108 }
00109 
00110 String *String::with(Character *obj1, Character *obj2)
00111 {
00112     String *newString = new String(2);
00113     newString->put(0, obj1);
00114     newString->put(1, obj2);
00115     return newString;
00116 }
00117 
00118 String *String::with(Character *obj1, Character *obj2, Character *obj3)
00119 {
00120     String *newString = new String(3);
00121     newString->put(0, obj1);
00122     newString->put(1, obj2);
00123     newString->put(2, obj3);
00124     return newString;
00125 }
00126 
00127 String *String::with(Character *obj1, Character *obj2, Character *obj3, Character *obj4)
00128 {
00129     String *newString = new String(4);
00130     newString->put(0, obj1);
00131     newString->put(1, obj2);
00132     newString->put(2, obj3);
00133     newString->put(3, obj4);
00134     return newString;
00135 }
00136 
00137 String *String::withAll(Collection *coll)
00138 {
00139     String *newString = new String(coll->size());
00140     int index = 0;
00141     for (Iterator *i = coll->iterator(); !i->finished(); i->next()) {
00142         newString->put(index++, i->value());
00143     }
00144     return newString;
00145 }
00146 
00147 /* Accessing protocol */
00148 Object *String::at(int index) const
00149 {
00150     if (index < 0 || index >= size())
00151         (new SubscriptOutOfBoundsError(__PRETTY_FUNCTION__, index))->raiseFrom(this);
00152     return implementor->at(index);
00153 }
00154 
00155 void String::put(int index, Object *obj)
00156 {
00157     if (obj == nil || !obj->isCharacter()) {
00158         error(new String("String stores only Characters."),
00159               new String(__PRETTY_FUNCTION__), obj);
00160     }
00161     if (index < 0 || index >= size())
00162         (new SubscriptOutOfBoundsError(__PRETTY_FUNCTION__, index))->raiseFrom(this);
00163 
00164     Character *chr = dynamic_cast<Character *>(obj);
00165 
00166     if (chr->byteSize() > implementor->byteSize()) {
00167         changeWideToFit(chr->byteSize());
00168     }
00169 
00170     implementor->put(index, chr);
00171 }
00172 
00173 void String::put(int index, char obj)
00174 {
00175     put(index, Character::value(obj));
00176 }
00177 
00178 long String::size(void) const
00179 {
00180     return implementor->size();
00181 }
00182 
00183 long String::indexOf(const char obj) const
00184 {
00185     return SequenceableCollection::indexOf(Character::value(obj));
00186 }
00187 
00188 long String::lastIndexOf(const char obj) const
00189 {
00190     return SequenceableCollection::lastIndexOf(Character::value(obj));
00191 }
00192 
00193 long String::nextIndexOf(const char element, long startIndex, long stopIndex) const
00194 {
00195     return SequenceableCollection::nextIndexOf(Character::value(element),
00196                                                startIndex, stopIndex);
00197 }
00198 
00199 long String::prevIndexOf(const char element, long startIndex, long stopIndex) const
00200 {
00201     return SequenceableCollection::prevIndexOf(Character::value(element),
00202                                                startIndex, stopIndex);
00203 }
00204 
00205 /* Adding protocol */
00206 void String::add(Object *)
00207 {
00208     shouldNotImplement(new String(__PRETTY_FUNCTION__));
00209 }
00210 
00211 void String::changeSize(long newSize)
00212 {
00213     if (newSize < 0) {
00214         fprintf(stderr, "Warning: Changing size of String to negative. Making zero instead.\n");
00215         newSize = 0;
00216     }
00217     implementor->changeSize(newSize);
00218 }
00219 
00220 /* Comparing protocol */
00221 long String::hash(void) const
00222 {
00223     long hashConst = 0x12345;
00224     int length = size();
00225 
00226     if (length > 0) {
00227         hashConst ^= ((Character *) at(0))->asInteger();
00228         if (length > 1) {
00229             hashConst ^= ((Character *) at(length-1))->asInteger();
00230             if (length > 3) {
00231                 hashConst ^= ((Character *) at(length/2-1))->asInteger()
00232                              ^ ((Character *) at(length/2+1))->asInteger();
00233             }
00234         }
00235     }
00236     return hashConst;
00237 }
00238 
00239 bool String::isEqual(const Object *object) const
00240 {
00241 /*    long length = size();
00242     if (object->isString() &&
00243         length == ((String *) object)->size()) {
00244         String *charArray = (String *) object;
00245         for (long i = 0; i < length; i++) {
00246             if (!at(i)->isEqual(charArray->at(i)))
00247                 return false;
00248         }
00249         return true;
00250     }
00251     return false;
00252 */
00253     if (!object->isString()) return false;
00254     String *otherOne = (String *) object;
00255     if (otherOne->size() != this->size()) return false;
00256     return this->implementor->isEqual(otherOne->implementor);
00257 }
00258 
00259 bool String::isEqual(const char *string) const
00260 {
00261     long i, length = size();
00262     for (i = 0; i < length && string[i] != 0; i++) {
00263         if (!((Character *) at(i))->isEqual(string[i]))
00264             return false;
00265     }
00266     if (i < length || string[i] != 0) return false;
00267     return true;
00268 }
00269 
00270 bool String::match(String *str)
00271 {
00272     return match(str, true);
00273 }
00274 
00275 bool String::match(const char *string)
00276 {
00277     return match(new String(string), true);
00278 }
00279 
00280 bool String::match(String *str, bool ignoreCase)
00281 {
00282     return str->matchesPattern(this, ignoreCase);
00283 }
00284 
00285 bool String::match(const char *str, bool ignoreCase)
00286 {
00287     return (new String(str))->matchesPattern(this, ignoreCase);
00288 }
00289 
00290 bool String::matchesPattern(String *pattern, bool ignoreCase)
00291 {
00292     long stringSize, stringIndex, stringStartScan;
00293     long patternSize, patternIndex, patternStartScan;
00294 
00295     stringSize = size();
00296     stringIndex = 0;
00297     patternSize = pattern->size();
00298     patternIndex = 0;
00299     stringStartScan = -1;
00300     patternStartScan = 0;
00301 
00302     while (patternIndex < patternSize) {
00303         Character *p = (Character *) pattern->at(patternIndex);
00304         patternIndex++;
00305         if (p->isEqual('*')) {
00306             /* We found a successful match after the last *, if any. */
00307             if (patternIndex >= patternSize)
00308                 return true;
00309             stringStartScan = stringIndex;
00310             patternStartScan = patternIndex;
00311         } else {
00312             if (stringIndex >= stringSize)
00313                 return false;
00314             Character *t = (Character *) at(stringIndex);
00315             stringIndex++;
00316             if (t->isEqual(p) || p->isEqual('#') ||
00317                 (ignoreCase && t->asUppercase()->isEqual(p->asUppercase()))) {
00318                 if (patternIndex >= patternSize && stringIndex < stringSize) {
00319                     if (stringStartScan < 0)
00320                         return false;
00321                     stringIndex = stringIndex + patternStartScan - patternIndex + 1;
00322                     patternIndex = patternStartScan;
00323                 }
00324             } else {
00325                 if (stringStartScan < 0)
00326                     return false;
00327                 stringIndex = stringIndex + patternStartScan - patternIndex + 1;
00328                 patternIndex = patternStartScan;
00329             }
00330         }
00331     }
00332 
00333     if (stringIndex < stringSize)
00334         return false;
00335     return true;
00336 }
00337 
00338 /* Converting protocol */
00339 char *String::asCString(void) const
00340 {
00341     Stream *stream = (new ByteArray(size()))->withEncoding("default")->writeStream();
00342     for (int i = 0; i < size(); i++) {
00343         try {
00344             stream->nextPut(at(i));
00345         }
00346         catch (Error *ex) {
00347             stream->nextPut('?');
00348         }
00349     }
00350     return dynamic_cast<ByteArray *>(stream->contents())->asCString();
00351 }
00352 
00353 String* String::format(const char* format, ...)
00354 {
00355     String *result;
00356     va_list ap;
00357 
00358     va_start(ap, format);
00359     result = formatArgumentsIntoString(format, ap);
00360     va_end(ap);
00361 
00362     return result;
00363 }
00364 
00365 String* String::format(const String* format, ...)
00366 {
00367     String *result;
00368     va_list ap;
00369 
00370     va_start(ap, format);
00371     result = formatArgumentsIntoString(format->asCString(), ap);
00372     va_end(ap);
00373     return result;
00374 }
00375 
00376 String::operator char*(void) const
00377 {
00378     return asCString();
00379 }
00380 
00381 Array *String::asArrayOfSubstrings(void)
00382 {
00383     return asArrayOfSubstringsSeparatedBy(Character::space());
00384 }
00385 
00386 Array *String::asArrayOfSubstringsSeparatedBy(Character *ch)
00387 {
00388     OrderedCollection *answer = new OrderedCollection;
00389     long startIndex = 0, stopIndex;
00390     long length = size();
00391 
00392     while (startIndex < length) {
00393         while (startIndex < length && at(startIndex) == ch) {
00394             startIndex++;
00395         }
00396         stopIndex = startIndex;
00397         while (stopIndex < length && at(stopIndex) != ch) {
00398             stopIndex++;
00399         }
00400         if (stopIndex > startIndex) {
00401             answer->add(copy(startIndex, stopIndex));
00402             startIndex = stopIndex;
00403         }
00404     }
00405     return answer->asArray();
00406 }
00407 
00408 Array *String::asArrayOfSubstringsSeparatedBy(const char ch)
00409 {
00410     return asArrayOfSubstringsSeparatedBy(Character::value(ch));
00411 }
00412 
00413 ByteArray *String::asByteArray(void) const
00414 {
00415     return asByteArrayEncoding("default");
00416 }
00417 
00418 ByteArray *String::asByteArrayEncoding(String *encoding) const
00419 {
00420     return ByteArray::fromStringEncoding(this, encoding);
00421 }
00422 
00423 ByteArray *String::asByteArrayEncoding(const char *encoding) const
00424 {
00425     return asByteArrayEncoding(new String(encoding));
00426 }
00427 
00428 String *String::asLowercase(void)
00429 {
00430     long length = size();
00431     String *newString = (String *) copyEmpty(length);
00432     for (long i = 0; i < length; i++) {
00433         newString->put(i, ((Character *) at(i))->asLowercase());
00434     }
00435     return newString;
00436 }
00437 
00438 Number *String::asNumber(void)
00439 {
00440     return Number::fromString(this);
00441 }
00442 
00443 Number *String::asNumberRadix(int radix)
00444 {
00445     return Number::fromStringRadix(this, radix);
00446 }
00447 
00448 String *String::asUppercase(void)
00449 {
00450     long length = size();
00451     String *newString = (String *) copyEmpty(length);
00452     for (long i = 0; i < length; i++) {
00453         newString->put(i, ((Character *) at(i))->asUppercase());
00454     }
00455     return newString;
00456 }
00457 
00458 String *String::asUppercaseFirst(void)
00459 {
00460     long length = size();
00461     if (length <= 0) return this;
00462     String *newString = (String *) copyEmpty(length);
00463     newString->put(0, ((Character *) at(0))->asUppercase());
00464     for (long i = 1; i < length; i++) {
00465         newString->put(i, ((Character *) at(i))->asLowercase());
00466     }
00467     return newString;
00468 }
00469 
00470 String *String::asString(void)
00471 {
00472     return this;
00473 }
00474 
00475 /* Copying protocol */
00476 Object *String::copy(void)
00477 {
00478     return new String(*this);
00479 }
00480 
00481 Object *String::copyEmpty(long size)
00482 {
00483     return new String(size);
00484 }
00485 
00486 SequenceableCollection &String::operator+(SequenceableCollection &collection)
00487 {
00488     return *copyReplace(size(), size(), &collection);
00489 }
00490 
00491 String &String::operator+(const char *string)
00492 {
00493     return *concatenateWith(string);
00494 }
00495 
00496 String *String::concatenateWith(String *string)
00497 {
00498     return (String *) copyReplace(size(), size(), string);
00499 }
00500 
00501 String *String::concatenateWith(const char *string)
00502 {
00503     return concatenateWith(new String(string));
00504 }
00505 
00506 /* Printing protocol */
00507 void String::printOn(Stream *stream) const
00508 {
00509     stream->nextPut('\"');
00510     stream->nextPutAll(this);
00511     stream->nextPut('\"');
00512 }
00513 
00514 /* Testing protocol */
00515 bool String::includes(const char element)
00516 {
00517     return includes(Character::value(element));
00518 }
00519 
00520 bool String::isString(void) const
00521 {
00522     return true;
00523 }
00524 
00525 bool String::isDigitString(void)
00526 {
00527     long length = size();
00528     for (long i = 0; i < length; i++) {
00529         if (!((Character *) at(i))->isDigit())
00530             return false;
00531     }
00532     return true;
00533 }
00534 
00535 bool String::startsWith(const char *str) const
00536 {
00537     return startsWith(new String(str));
00538 }
00539 
00540 bool String::endsWith(const char *str) const
00541 {
00542     return endsWith(new String(str));
00543 }
00544 
00545 bool String::isByteString(void) const
00546 {
00547     return implementor->isByteString();
00548 }
00549 
00550 bool String::isTwoByteString(void) const
00551 {
00552     return implementor->isTwoByteString();
00553 }
00554 
00555 bool String::isCharacterArray(void) const
00556 {
00557     return implementor->isCharacterArray();
00558 }
00559 
00560 /* Utilities protocol */
00561 String *String::trimBlanks(void)
00562 {
00563     String *separators = new String(" \t\n\r");
00564     long index = 0, length = size();
00565 
00566     if (length == 0) return this;
00567     while (index < length && separators->includes(this->at(index)))
00568         index++;
00569     while (index < length && separators->includes(this->at(length-1)))
00570         length--;
00571     if (index == length) return new String;
00572     if (index == 0 && length == size()) return this;
00573     return dynamic_cast<String *>(copy(index, length));
00574 }
00575 
00576 String *String::demangleMethodName(void)
00577 {
00578 #if __GNUC_PREREQ(3,0)
00579     Stream *stream = (new String(size()))->writeStream();
00580     long segmentLength = 3, segmentStart = 0;
00581     String *buffer = this;
00582     String *segment;
00583 
00584     /* Skip prefix */
00585     while (!dynamic_cast<Character *>(buffer->at(segmentStart + segmentLength))->isDigit()) {
00586         segmentLength++;
00587     }
00588 
00589     while (dynamic_cast<Character *>(buffer->at(segmentStart + segmentLength))->isDigit()) {
00590         if (buffer != this) stream->nextPutAll("::");
00591         buffer = dynamic_cast<String *>(buffer->copy(segmentLength + segmentStart, buffer->size()));
00592         segmentStart = 0;
00593         segmentLength = buffer->asNumber()->asLong();
00594         while (dynamic_cast<Character *>(buffer->at(segmentStart))->isDigit())
00595             segmentStart++;
00596         segment = dynamic_cast<String *>(buffer->copy(segmentStart, segmentStart + segmentLength));
00597         stream->nextPutAll(segment);
00598         if (buffer->at(segmentStart + segmentLength)->isEqual(Character::value('D'))) {
00599             stream->nextPutAll("::~");
00600             stream->nextPutAll(segment);
00601         }
00602     }
00603     if (buffer == this) {
00604         /* Can't demangle */
00605         return this;
00606     }
00607     stream->nextPutAll("()");
00608 
00609     return dynamic_cast<String *>(stream->contents());
00610 #else
00611     /* Don't know how to demangle */
00612     return this;
00613 #endif
00614 }
00615 
00616 /* Visiting protocol */
00617 void String::visitBy(Visitor *visitor)
00618 {
00619     visitor->visitString(this);
00620 }
00621 
00622 /* Private protocol */
00623 String *String::formatArgumentsIntoString(const char *format, va_list args)
00624 {
00625     char buf[1025];
00626 
00627     vsnprintf(buf, 1024, format, args);
00628     return (new ByteArray(buf, strlen(buf)))->asString();
00629 }
00630 
00631 void String::changeWideToFit(int width)
00632 {
00633     switch (width) {
00634     case 1 :
00635         implementor = implementor->asByteString();
00636         break;
00637     case 2 :
00638         implementor = implementor->asTwoByteString();
00639         break;
00640     case 4 :
00641         implementor = implementor->asCharacterArray();
00642         break;
00643     }
00644 }

Generated on Mon Nov 27 09:47:55 2006 for Smalltalk like C++ Class Library by  doxygen 1.4.2