Examples of org.carrot2.text.util.MutableCharArray

org.carrot2.util.resource.ResourceLookup
Implements {@link CharSequence} over a mutable char[] buffer.
This class implements proper content-based {@link #hashCode()} and{@link #equals(Object)} against other {@link MutableCharArray} objects, assuming theunderlying character buffers does not change. In case the buffers is changed, the resulting behavior is unpredictable.

    
            final ProcessingResult result = ctrl1.process(
                Collections.<String, Object> emptyMap(), TestComponent.class);
            final ILexicalData data = result.getAttribute("english");


            assertTrue(data.isCommonWord(new MutableCharArray("uniquea")));
            assertFalse(data.isCommonWord(new MutableCharArray("uniqueb")));
        }


        // Create pooling controller, use tempDir2
        final Controller ctrl2 = ControllerFactory.createPooling();
        {
            ctrl2.init(ImmutableMap.<String, Object> of(resourceLookupKey, 
                new ResourceLookup(new DirLocator(tempDir2.getPath()), classpathLocator)));
    
            final ProcessingResult result = ctrl2.process(
                Collections.<String, Object> emptyMap(), TestComponent.class);
            final ILexicalData data = result.getAttribute("english");


            assertFalse(data.isCommonWord(new MutableCharArray("uniquea")));
            assertTrue(data.isCommonWord(new MutableCharArray("uniqueb")));
        }


        // Now, reuse the first controller, nothing should change.
        {
            final ProcessingResult result = ctrl1.process(
                Collections.<String, Object> emptyMap(), TestComponent.class);
            final ILexicalData data = result.getAttribute("english");


            assertTrue(data.isCommonWord(new MutableCharArray("uniquea")));
            assertFalse(data.isCommonWord(new MutableCharArray("uniqueb")));
        }        
    }

View Full Code Here

            final ITokenizer tokenStream = createTokenStream();
            tokenStream.reset(new StringReader(testString));


            final ArrayList<TokenImage> tokens = new ArrayList<TokenImage>();
            short token;
            MutableCharArray buffer = new MutableCharArray();
            while ((token = tokenStream.nextToken()) >= 0)
            {
                tokenStream.setTermBuffer(buffer);
                tokens.add(new TokenImage(buffer.toString(), token));
            }


            for (int i = 0; i < tokens.size(); i++) {
            }

View Full Code Here

    {
        final String [] testData = getCommonWordsTestData();
        for (String word : testData)
        {
            assertTrue(languageModel.getLexicalData().isCommonWord(
                new MutableCharArray(word)));
        }
    }

View Full Code Here

        if (context.hasLabels())
        {
            // Term-document matrix building and reduction
            final VectorSpaceModelContext vsmContext = new VectorSpaceModelContext(
                context);
            final ReducedVectorSpaceModelContext reducedVsmContext = new ReducedVectorSpaceModelContext(
                vsmContext);
            LingoProcessingContext lingoContext = new LingoProcessingContext(
                reducedVsmContext);


            matrixBuilder.buildTermDocumentMatrix(vsmContext);

View Full Code Here


          }
        },
        
        // Using the class loader directly because this time we want to omit the prefix 
        new ClassLoaderLocator(core.getResourceLoader().getClassLoader())));
    
    this.controller.init(initAttributes);
    this.idFieldName = core.getSchema().getUniqueKeyField().getName();


    // Make sure the requested Carrot2 clustering algorithm class is available

View Full Code Here

                    .defaultLanguage(LanguageCode.ENGLISH);




                    File resourcesDir = new File(environment.configFile(), "carrot2/resources");


                    ResourceLookup resourceLookup = new ResourceLookup(new DirLocator(resourcesDir));


                    DefaultLexicalDataFactoryDescriptor.attributeBuilder(attributes)
                    .mergeResources(true);
                    LexicalDataLoaderDescriptor.attributeBuilder(attributes)
                    .resourceLookup(resourceLookup);

View Full Code Here

                  + carrot2ResourcesDir);
              final InputStream resourceStream = resourceLoader
                  .openResource(carrot2ResourcesDir + "/" + resource);
              
              log.info(resource + " loaded from " + carrot2ResourcesDir);
              final IResource foundResource = new IResource() {
                public InputStream open() throws IOException {
                  return resourceStream;
                }
              };
              return new IResource[] { foundResource };

View Full Code Here

    initAttributes.put("solrIndexSchema", core.getSchema());


    // Customize Carrot2's resource lookup to first look for resources
    // using Solr's resource loader. If that fails, try loading from the classpath.
    DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes)
        .resourceLookup(new ResourceLookup(new IResourceLocator() {
          public IResource[] getAll(final String resource) {
            final SolrResourceLoader resourceLoader = core.getResourceLoader();
            final String carrot2ResourcesDir = resourceLoader.getConfigDir()
                + initParams.get(CarrotParams.LEXICAL_RESOURCES_DIR, CARROT_RESOURCES_PREFIX);
            try {

View Full Code Here

    initAttributes.put("solrIndexSchema", core.getSchema());


    // Customize Carrot2's resource lookup to first look for resources
    // using Solr's resource loader. If that fails, try loading from the classpath.
    DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes)
        .resourceLookup(new ResourceLookup(new IResourceLocator() {
          public IResource[] getAll(final String resource) {
            final SolrResourceLoader resourceLoader = core.getResourceLoader();
            final String carrot2ResourcesDir = resourceLoader.getConfigDir()
                + initParams.get(CarrotParams.LEXICAL_RESOURCES_DIR, CARROT_RESOURCES_PREFIX);
            try {

View Full Code Here

                    .defaultLanguage(LanguageCode.ENGLISH);




                    File resourcesDir = new File(environment.configFile(), "carrot2/resources");


                    ResourceLookup resourceLookup = new ResourceLookup(new DirLocator(resourcesDir));


                    DefaultLexicalDataFactoryDescriptor.attributeBuilder(attributes)
                    .mergeResources(true);
                    LexicalDataLoaderDescriptor.attributeBuilder(attributes)
                    .resourceLookup(resourceLookup);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.carrot2.text.util.MutableCharArray

com.carrotsearch.hppc.IntArrayList

com.carrotsearch.hppc.IntIntOpenHashMap

com.carrotsearch.hppc.IntStack

com.tamingtext.carrot2.Carrot2ExampleTest

org.apache.http.message.BasicNameValuePair

org.apache.lucene.search.IndexSearcher

org.apache.mahout.math.matrix.DoubleMatrix2D

org.carrot2.cli.batch.BatchApp

org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm

org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithmTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.