Sunday, February 24, 2019

Algorithm refresher


QuckSort

see also

var a = [3,2,1,0,-1,-2,11,9,-1];

var partition = function(a,lo,hi) {
  var pivot = a[ hi ];
  var i=lo-1;
  var j=hi+1;
  while (true) {
    do {
      ++i;
    } while( a[i] < pivot)
    do {
      --j;
    } while( a[j] > pivot)
    
    if( i>j) return j;
    // swap
    tmp = a[i]; 
    a[i] = a[j];
    a[j] = tmp;
  }
}

var quicksort = function(a,lo,hi) {
  if( lo < hi ) {
    pivot = partition(a, lo, hi);
    quicksort(a, lo, pivot)
    quicksort(a, pivot+1, hi)
  }
}

quicksort(a,0,a.length-1);

console.log(a);

console.log(qs(a,0,a.length));

Mergesort


var a = [3,2,1,0,-1,-2,11,9,-1];

var merge = function(left, right) {
    var i=0,j=0,ll=left.length,lr=right.length;
    var r = [];
    while(i < ll && j < lr) {
     if(left[i] < right[j]) {
         r.push(left[i]);
            ++i;
        } else {
         r.push(right[j]);
            ++j;
        }
    }
    for(;i < ll;i++) r.push(left[i]);
    for(;j < lr;j++) r.push(right[j]);
 return r;
}

var mergesort = funtion( a) {
    var len = a.length;
    if(len < 2) return a;
    var mid = Math.floor(len/2);
    var left = a.slice(0,mid);
    var right = a.slice(mid);
    return merge( mergesort(left), mergersort(right) );
}

console.log( mergesort(a));

Simple shunting yard algorithm in Kotlin


/**
 * You can edit, run, and share this code. 
 * play.kotlinlang.org 
 */

fun main() {
    val expr = listOf(1,'+',2,'*','(',6, '+', 1,')', '*', 2)
    val t = syard(expr)
    println("expr $t")
}


fun syard(expr: List): MutableList {
    val stack = java.util.Stack()
    val output = mutableListOf()
    
    expr.forEach {
        when(it) {
            '(' -> stack.push('(')
            ')' -> {
                var t = stack.pop()
                while( t != '(') {
                    output.add(t)
                    t = stack.pop()
                }
            }
            '+' -> {
                while(stack.isNotEmpty() && (stack.peek() == '*' || stack.peek() == '/')) {
                    output.add(stack.pop())
                }
                stack.push('+')
            }
            '-' -> {
                while(stack.isNotEmpty() && (stack.peek() == '*' || stack.peek() == '/')) {
                    output.add(stack.pop())
                }
                stack.push('-')
            }
            '*' -> stack.push('*')
            '/' -> stack.push('/')
            else -> output.add(it)
        }
    }
    
    while(stack.isNotEmpty()) {
        output.add( stack.pop() )
    }
    
    return output
}

Thursday, August 23, 2018

Curring in javascript



function add(n) {
    v = function( nn) { return add(n + nn) }
    v.valueOf = function() { return(n) }
    return v
}

console.log(add(1)(2)(10))


support any number of parameters

Sunday, October 26, 2014

Audio Signal Processing for Music Applications / Week 2: Discrete Fourier transform

DFT
`X[k]=sum_(n=0)^(N-1)x[n]e^(-j2 pi kn // N) k=0,...,N-1`
n: discrete time index (normalized time, T=1)
k: discrete frequency index
`omega_k = 2 pi k // N` : frequency in radians
`f_k = f_s k // N`: frequency in Hz ( `f_s` - sampling rate)
`=>`


Complex DFT
`X[k]=sum_(n=0)^(N-1)e^(j2 pi f_0 n + phi)e^(-j2 pi kn // N) k=0,...,N-1`
`=>` ` e^(j phi) (1-e^(-j2 pi (k // N - f_0) N)) / (1-e^(-j2 pi (k // N - f_0))) `

def DFT(x):
    """
    Input:
        x (numpy array) = input sequence of length N
    Output:
        The function should return a numpy array of length N
        X (numpy array) = The N point DFT of the input sequence x
    """
    ## Your code here
    N = len(x)
    r = -1j*2*np.pi/N
    iterable = (sum((x[n]*np.exp( r*n*k) for n in range(N) ), 0.0) for k in range(N))
    return np.fromiter( iterable, np.complex)

IDFT
`x[n]= 1/N sum_(k=0)^(N-1)X[k]e^(j2 pi kn // N) n=0,...,N-1`

def DFT(x):
    """
    Input:
        x (numpy array) = input sequence of length N
    Output:
        The function should return a numpy array of length N
        X (numpy array) = The N point DFT of the input sequence x
    """
    ## Your code here
    N = len(x)
    r = -1j*2*np.pi/N
    iterable = (sum((x[n]*np.exp( r*n*k) for n in range(N) ), 0.0) for k in range(N))
    return np.fromiter( iterable, np.complex)

IDFT for real signals
`X[k]=|X[k]|e^(j < X[k])` and `X[-k]=|X[k]| e^(-j < X[k])` for k=0,1,...N/2-1


def IDFT(X):
    """
    Input:
        X (numpy array) = frequency spectrum (length N)
    Output:
        The function should return a numpy array of length N 
        x (numpy array) = The N point IDFT of the frequency spectrum X
    """
    ## Your code here
    N = len(X)
    r = 1j*2*np.pi/N
    iterable = (sum((X[k]*np.exp( r*n*k) for k in range(N) ), 0.0)/N for n in range(N))
    return np.fromiter( iterable, np.complex)


Upd. https://github.com/MTG/sms-tools

Friday, October 24, 2014

Week 4 of MofMD. Recommender Systems

Two major classes
  • Content based
  • Collaborative filtering
  • Latent factor based
Discussion on "Long Tail" effect. Some products (less popular) may mot exists in physical stores because it's not reasonable to stock them.

Formal model:
C - set of Customers
S - set of Items

Utility function u: C x S -> R (set of Ratings)
R - totally ordered
Key problems:
  • Gathering "known" ratings for matrix
    • Explicit (ask people, don't scale)
    • Implicit (learn from actions, hard to learn low ratings)
  • Extrapolate unknown ratings from the known ones (mostly interested in high unknown ratings)
    • Sparsity
    • Cold start: new items have no ratings, new users have no history
  • Evaluating extrapolation methods

Content-based recommendations

Main idea to recommend customer x items similar to  previous items highly recommended by customer x
Item profiles - set of "important" features
Text features - set of "important" words
Extract using heuristics like TF-IDF (Term frequency * Inverse Doc Frequency)
`f_{ij}` - frequency of ter (feature) `i` in doc (item) `j`
`TF_{ij}= f_{ij}/{max_kf_{ij}}`
`n_j` - number of docs that mention term `i`
`N`- total number of docs
`IDF_i=log(N/n_i)` TF-IDF score: `w_{ij}=TF_{ij} xx IDF_i`
Doc profile - set of words with highest TF-IDF scores (together with scores)
I.e. word is frequent in (small) set of document - it's important
Steps include building user profile as sort of aggregated features of items and estimate `U(x,i)=cos(theta)=(x*i)/(|x||i|)` where x is User profile, i Item profile. Aggregation can be mean or normalized mean of features of items.
Pros:
  • No need for data on other users
  • Able to recommend to users with unique tastes
  • Able to recommend new & unpopular items
  • Explanation for recommended items
Cons:
  • Hard to find appropriate features
  • Overspecialization - never recommend items outside user's profile, unable to exploit quality judgments of other users
  • Cold start problem - how to build user profile

Collaborative filtering

Main idea to find N other users whose rating is similar to x's rating and estimate x's rating based on ratings of users in N
Similarity between users
  • Jaccard similarity `sim(A,B)=|r_A nn r_B|/|r_A uu r_B|` doesn't work well.
  • `sim(A,B)= cos ( r_A, r_B )`
  • Centered cosine is better `sim(A,B)= cos ( r_A, r_B )` - center user's rating around average value.( Make 0 average user's rating)
Rating Predictions
Let `r_x` be the vector of user x's ratings
Let N be the set of k users most similar to x who also rated item i
Prediction for user x and item i:
Option 1: `r_{:x i:} = 1/k sum _{:y in N:}r_{:yi:}` - doesn't take in account "degree of similarity" of users
Option 2: `r_{:x i:} = 1/k sum _{:y in N:}s_{:xy:}r_{:yi:} // sum _{:y in N:}s_{:xy:}` where `s_{:xy:} = sim(x,y)`

Another version is Item-Item filtering (User-User described right above)
  • For item i find other similar items
  • Estimate rating for item i based on ratings for similar items
  • Can use same similarity metrics and prediction functions as in user-user model
`r_{:x i:} = {sum _{:j in N (i;x):}s_{:ij:}r_{:xj:}} / {sum _{:j in N(i;x):}s_{:ij:}}` where `s_{:ij:}` similarity of items i and j `r_{:xj:}` rating of user x on item `N(i;x)` set items rated by x similar to i

In practice item-item outperforms user-user in many use cases. - Items are "simpler" than users. Items similarity is more meaningful than user similarity

Common practice implementing collaborative filtering - some combination of recommendation system plus filtering
`r_{:x i:} = b_{:x i:}+{sum _{:j in N (i;x):}s_{:ij:}(r_{:xj:}-b_{:xj:})} / {sum _{:j in N(i;x):}s_{:ij:}}` where `b_{:x i:} = mu + b_x + b_i`, `mu` - overall mean movie rating (example of movie database), `b_x` - rating deviation of user  x = (avg rating of user x) - `mu`, `b_i` - rating deviation of movie i

Evaluating Recommender system
Predict ratings on test set and evaluate RMSE
`sqrt({sum_{(x,i) in T}(r_{x i} - r_{x i}^**)^2} / N)`

`N=|T|`, `r_{x I}` the predicted rating, `r_{x i}^**` the actual rating.

Problem with RMSE - Prediction Diversity (all predictions too similar to each other), Prediction Context (user might operate in different context => want different results), Order of Prediction
We care only to predict high ratings - RMSE might penalize method that does well for high ratings and badly with others, Alternative : precision at top k i.e. percentage of prediction in user's top k ratings.

Latent Factor Models

Global effects => like Baseline Estimation
Local neighborhood (CF/NN) => Final estimate
Collaborative filtering - derive unknown rating from k-nearest neighbors like
`hat r_{x i} = (sum_{j in N(i;x)}S_{ij}*r_{xj})/(sum_{j in N(i;x)}S_{ij})`
`s_{ij}` similarity of items i and j
`r_{uj}` rating of user x on item j
`N(i;x)` set of items similar to item i that were rated by x


Combine with global effects
`hat r_{x i} = b_{x i} + (sum_{j in N(i;x)}S_{ij}*(r_{xj}-b_{x j}))/(sum_{j in N(i;x)}S_{ij})`
`b_{x i}=mu+b_x + b_i`
`mu` - overall mean rating
`b_x` - rating deviation of user  x
`b_i` - rating deviation of item i

Problems:
1) Similarity are arbitrary
2) Pairwise similarities neglect interdependencies among users
3) Taking weighted average can be restricting


SVD
A - input data matrix
U - left singular vec
V - right singular vec
`Sigma` - singular values

`A~~U Sigma V^T` => "SVD" `R~~Q*P^T`, A=R, Q=U, `P^T=Sigma V^T`
R - utility matrix approximated by Q and P matrices of lower dimensions

SVS minimizes reconstruction error
`SSE = {::}_{U,V,Sigma}^"min" sum_{i,j in A}(A_{ij}-[U Sigma V^T]_{ij})^2`
SSE - Sum of Squared Errors
SSE and MSE are monotonically related `RMSE = 1/{const} sqrt(SSE)` => also minimizing RMSE
But R has missing entries!

So modify SVD to find P,Q such as
`{::}_{P,Q}^"min" sum_{(i,x in R)}(r_{"xi"}-q_i * p_x^T)^2` (use elements with defined scores)
also
  • we don't require P,Q to be orthogonal/unit length
  • P,Q map users/movies to latent space
We want a large k, but if k>2 SSE on training data will increase, because model will adapt noise. So we may use regularization like
`{::}_{P,Q}^"min" sum_{(i,x in R)}(r_{"xi"}-q_i * p_x^T)^2+ lambda[ sum_x||p_x||^2+sum_i||q_i||^2]` 
and use gradient descent

Initialize P and Q (using SVD - pretend missed ratings are 0)
Do gradient descent
  • `P larr P - eta * gradP`
  • `Q larr Q - eta * gradQ`
where `gradQ = [gradq_{ik}]` and `gradq_{ik}=sum_"xi"-2(r_"xi"-q_ip_x^T)p_"xk"+2lambdaq_"ik"`and similary for `gradP`

Modeling biases and interactions
`hat r_"xi" = mu + b_x + b_i + q_i*p_x^T`
where
'mu' - overall mean rating
`b_x` - bias for user x
`b_i` - bias for movie i
`q_i*p_x^T` - user movie interaction

and we can fit a new model
`{::}_{P,Q}^"min" sum_{(i,x in R)}(r_{"xi"}- (mu + b_x + b_i + q_i*p_x^T))^2` `+ lambda[ sum_x||p_x||^2+sum_i||q_i||^2+sum_x||b_x||^2+sum_i||b_i||^2]` 
where `b_u` and `b_i` become additional parameters for the estimation

Dimensionality Reduction

Matrix rank - number of lineary independent columns/rows. The amount of coordinates can be reduced to rank.

Why reduce?
  • Discover hidden correlation/topics
  • Remove redundant and noisy features
  • Interpretation and visualization
  • Easier storage and processing of data

SVD definition

`A_[m x n] = U_[m x r] Sigma_[r x r] (V_[n x r])^T`
A: input matrix (say, m documents, n terms)
U: Left singular martix m x n matrix (say, m documents, r concepts)
`Sigma` Singular values, r x r diagonal matrix, values on diagonal sorted in decreasing order ("strength of each concept")
V: Right singular vector n x r matrix ( n terms, r concepts)

`A~~USigmaV^T = sum_isigma_iu_inu_i^T`

Decomposition is always possible, matrices are unique, U and  V columns orthonormal ( `U^TU=I`, `V^TV=I`, columns are orthogonal unit vectors), `Sigma` diagonal, singular values are positive and sorted in decreasing order

We can remove minor (tail `nu`).
Frobenius norm is minimal `||M||_F=sqrt(Sigma_"ij"M_"ij^2")`

SVD gives best low rank approximation
Rule-of-a thumb - Keep 80-90% of 'energy' (``=sumsigma_i^2)
SVD complexity O(nm^2) or O(n^2m)whichever is less but less work if we just nee a singular values, or k first singular vectors, or the matrix is sparse


Query SVD
`q_"concept"= q V` - transform query q to "concept" space and it will give a matrix of "strength" of each concept

Relation to Eigen-decomposition
`A=XLambdaX^T`
  • A is symmetric
  • U,V,X are orthonormal (`U^TU=I`)
  • `Lambda, Sigma` are diagonal
or diagonal matrix `lambda_i=sigma_i^2`

So
+ Optimal low rank approximation
- Interpretability problem
- Lack of sparsity (singular vectors are dense)

CUR decomposition
Goal - express A as product of C,U,R matrices and keep `||A - C*U*R||_F` small

"Constraints" on U - contains set of columns from A, similarly on R - set of rows from A

In practice we can choose about 4K rows/cols and do almost as good as SVD does.

See book for details.

Pros:
Easy interpretation
Sparcity
Cons:
Rows and columns with large norm may be choosen few times (probabilistic method)




P.S. ASCIIMathML

Monday, August 19, 2013

Brief review of Competitive Strategy course on Coursera

Week 1
Review of simultaneous games in a form of payoff matrix. Definition of a dominating strategy (no other strategy can do better) and domination strategy elimination as a solution for the game. Nash equilibrium (a combination of strategies no player can deviate unilaterally from his current strategy to improve his payoff). Prisoner's dilemma as example. Sequential games in a form of decision tree, backward induction as a way to solve game. Credible threats and how to make threats credible.
Week 2
Reasons for cooperation. Sometime competition is not a in best interest of firm. Ways to ensure cooperation infinite repetition and commitment. Factors enhancing cooperation: high value of a future payoffs. Factors hindering cooperation: large number of competitors, low punishment. Aggressive commitment. Cooperative commitment in a form of reputation building (make the company known as reliable cooperator, make it known that you treat others fairly), self-binding commitment (make investment that convinces potential partners that one is committed to act cooperatively), most favored customer clause (promises the customer that it will get refunded if another customer i charged a lower price in the future - so price reduction become not an option)
Week 3
Complements. Reason to support others. Reasons to produce complements -  cross subsidies, bundling, increase lock-in.Strategic partnership in a form of organisational or/and economical integration.
Week 4
Choice of market: Market attractiveness, structural and strategic barriers.
Entry strategies: Commitment/Value chain reconfiguration, Judo economics/Niche market.
Entry Deterrence: Structural entry barriers/Commitment, Limit pricing/Predatory pricing. Pre-emption.
Week 5
Stages of R&D. Types of innovations: process and product, drastic and incremental. Initiatives to innovate:  competitive market, monopolist, monopolist in a threat of entry.
Week 6
Bertrand paradox,. Vertical and horizontal differentiation. Generic strategies: cost leadership, differentiation, focus.

Friday, August 31, 2012

CUDA on Fedora 17

Finally have CUDA toolkit running on my Fedora 17 box.

Download Dev driver, Toolkit and SDK from http://developer.nvidia.com/cuda/cuda-downloads
Additional instructions are here http://www.r-tutor.com/gpu-computing/cuda-installation/cuda4.2-fc14
Tried to compile example from SDK - got gcc version incompatibility. Workaround is found here http://fedoraproject.org/wiki/Cuda#Fedora_17 :
Some compatibility problems appeared with gcc-4.7. You will have to install a compatibility version:
su -c 'yum install compat-gcc-34 compat-gcc-34-c++'
Create a symbolic link to make Cuda point to gcc-3.4:
su -c 'ln -s /usr/bin/gcc34 /opt/cuda/bin/gcc'
 Build shared libraries in ~/NVIDIA_GPU_Computing_SDK/shared
Build ~/NVIDIA_GPU_Computing_SDK/C/src/deviceQuery and run it in ~/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/deviceQuery
Finally got deviceQuery.txt with:

./deviceQuery Starting...

 CUDA Device Query (Runtime API) version (CUDART static linking)

Found 1 CUDA Capable device(s)

Device 0: "GeForce GT 430"
  CUDA Driver Version / Runtime Version          5.0 / 4.2
  CUDA Capability Major/Minor version number:    2.1
  Total amount of global memory:                 1023 MBytes (1072889856 bytes)
  ( 2) Multiprocessors x ( 48) CUDA Cores/MP:    96 CUDA Cores
  GPU Clock rate:                                1400 MHz (1.40 GHz)
  Memory Clock rate:                             800 Mhz
  Memory Bus Width:                              128-bit
  L2 Cache Size:                                 131072 bytes
  Max Texture Dimension Size (x,y,z)             1D=(65536), 2D=(65536,65535), 3D=(2048,2048,2048)
  Max Layered Texture Size (dim) x layers        1D=(16384) x 2048, 2D=(16384,16384) x 2048
  Total amount of constant memory:               65536 bytes
  Total amount of shared memory per block:       49152 bytes
  Total number of registers available per block: 32768
  Warp size:                                     32
  Maximum number of threads per multiprocessor:  1536
  Maximum number of threads per block:           1024
  Maximum sizes of each dimension of a block:    1024 x 1024 x 64
  Maximum sizes of each dimension of a grid:     65535 x 65535 x 65535
  Maximum memory pitch:                          2147483647 bytes
  Texture alignment:                             512 bytes
  Concurrent copy and execution:                 Yes with 1 copy engine(s)
  Run time limit on kernels:                     Yes
  Integrated GPU sharing Host Memory:            No
  Support host page-locked memory mapping:       Yes
  Concurrent kernel execution:                   Yes
  Alignment requirement for Surfaces:            Yes
  Device has ECC support enabled:                No
  Device is using TCC driver mode:               No
  Device supports Unified Addressing (UVA):      Yes
  Device PCI Bus ID / PCI location ID:           1 / 0
  Compute Mode:
     < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >

deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 5.0, CUDA Runtime Version = 4.2, NumDevs = 1, Device = GeForce GT 430
Ready to roll :-)