Package org.carrot2.text.util

Examples of org.carrot2.text.util.MutableCharArray


   
            final ProcessingResult result = ctrl1.process(
                Collections.<String, Object> emptyMap(), TestComponent.class);
            final ILexicalData data = result.getAttribute("english");

            assertTrue(data.isCommonWord(new MutableCharArray("uniquea")));
            assertFalse(data.isCommonWord(new MutableCharArray("uniqueb")));
        }

        // Create pooling controller, use tempDir2
        final Controller ctrl2 = ControllerFactory.createPooling();
        {
            ctrl2.init(ImmutableMap.<String, Object> of(resourceLookupKey,
                new ResourceLookup(new DirLocator(tempDir2.getPath()), classpathLocator)));
   
            final ProcessingResult result = ctrl2.process(
                Collections.<String, Object> emptyMap(), TestComponent.class);
            final ILexicalData data = result.getAttribute("english");

            assertFalse(data.isCommonWord(new MutableCharArray("uniquea")));
            assertTrue(data.isCommonWord(new MutableCharArray("uniqueb")));
        }

        // Now, reuse the first controller, nothing should change.
        {
            final ProcessingResult result = ctrl1.process(
                Collections.<String, Object> emptyMap(), TestComponent.class);
            final ILexicalData data = result.getAttribute("english");

            assertTrue(data.isCommonWord(new MutableCharArray("uniquea")));
            assertFalse(data.isCommonWord(new MutableCharArray("uniqueb")));
        }       
    }
View Full Code Here


            final ITokenizer tokenStream = createTokenStream();
            tokenStream.reset(new StringReader(testString));

            final ArrayList<TokenImage> tokens = new ArrayList<TokenImage>();
            short token;
            MutableCharArray buffer = new MutableCharArray();
            while ((token = tokenStream.nextToken()) >= 0)
            {
                tokenStream.setTermBuffer(buffer);
                tokens.add(new TokenImage(buffer.toString(), token));
            }

            for (int i = 0; i < tokens.size(); i++) {
            }       
View Full Code Here

    {
        final String [] testData = getCommonWordsTestData();
        for (String word : testData)
        {
            assertTrue(languageModel.getLexicalData().isCommonWord(
                new MutableCharArray(word)));
        }
    }
View Full Code Here

        if (context.hasLabels())
        {
            // Term-document matrix building and reduction
            final VectorSpaceModelContext vsmContext = new VectorSpaceModelContext(
                context);
            final ReducedVectorSpaceModelContext reducedVsmContext = new ReducedVectorSpaceModelContext(
                vsmContext);
            LingoProcessingContext lingoContext = new LingoProcessingContext(
                reducedVsmContext);

            matrixBuilder.buildTermDocumentMatrix(vsmContext);
View Full Code Here

          }
        },
       
        // Using the class loader directly because this time we want to omit the prefix
        new ClassLoaderLocator(core.getResourceLoader().getClassLoader())));
   
    this.controller.init(initAttributes);
    this.idFieldName = core.getSchema().getUniqueKeyField().getName();

    // Make sure the requested Carrot2 clustering algorithm class is available
View Full Code Here

                    .defaultLanguage(LanguageCode.ENGLISH);


                    File resourcesDir = new File(environment.configFile(), "carrot2/resources");

                    ResourceLookup resourceLookup = new ResourceLookup(new DirLocator(resourcesDir));

                    DefaultLexicalDataFactoryDescriptor.attributeBuilder(attributes)
                    .mergeResources(true);
                    LexicalDataLoaderDescriptor.attributeBuilder(attributes)
                    .resourceLookup(resourceLookup);
View Full Code Here

                  + carrot2ResourcesDir);
              final InputStream resourceStream = resourceLoader
                  .openResource(carrot2ResourcesDir + "/" + resource);
             
              log.info(resource + " loaded from " + carrot2ResourcesDir);
              final IResource foundResource = new IResource() {
                public InputStream open() throws IOException {
                  return resourceStream;
                }
              };
              return new IResource[] { foundResource };
View Full Code Here

    initAttributes.put("solrIndexSchema", core.getSchema());

    // Customize Carrot2's resource lookup to first look for resources
    // using Solr's resource loader. If that fails, try loading from the classpath.
    DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes)
        .resourceLookup(new ResourceLookup(new IResourceLocator() {
          public IResource[] getAll(final String resource) {
            final SolrResourceLoader resourceLoader = core.getResourceLoader();
            final String carrot2ResourcesDir = resourceLoader.getConfigDir()
                + initParams.get(CarrotParams.LEXICAL_RESOURCES_DIR, CARROT_RESOURCES_PREFIX);
            try {
View Full Code Here

    initAttributes.put("solrIndexSchema", core.getSchema());

    // Customize Carrot2's resource lookup to first look for resources
    // using Solr's resource loader. If that fails, try loading from the classpath.
    DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes)
        .resourceLookup(new ResourceLookup(new IResourceLocator() {
          public IResource[] getAll(final String resource) {
            final SolrResourceLoader resourceLoader = core.getResourceLoader();
            final String carrot2ResourcesDir = resourceLoader.getConfigDir()
                + initParams.get(CarrotParams.LEXICAL_RESOURCES_DIR, CARROT_RESOURCES_PREFIX);
            try {
View Full Code Here

                    .defaultLanguage(LanguageCode.ENGLISH);


                    File resourcesDir = new File(environment.configFile(), "carrot2/resources");

                    ResourceLookup resourceLookup = new ResourceLookup(new DirLocator(resourcesDir));

                    DefaultLexicalDataFactoryDescriptor.attributeBuilder(attributes)
                    .mergeResources(true);
                    LexicalDataLoaderDescriptor.attributeBuilder(attributes)
                    .resourceLookup(resourceLookup);
View Full Code Here

TOP

Related Classes of org.carrot2.text.util.MutableCharArray

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.